Base code

2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions
--- a/backend/services/research/init.py
+++ b/backend/services/research/init.py
@@ -0,0 +1,55 @@
+"""
+Research Services Module for ALwrity
+
+This module provides research and grounding capabilities for content generation,
+replacing mock research with real-time industry information.
+
+Available Services:
+- GoogleSearchService: Real-time industry research using Google Custom Search API
+- ExaService: Competitor discovery and analysis using Exa API
+- TavilyService: AI-powered web search with real-time information
+- Source ranking and credibility assessment
+- Content extraction and insight generation
+
+Core Module (v2.0):
+- ResearchEngine: Standalone AI research engine for any content tool
+- ResearchContext: Unified input schema for research requests
+- ParameterOptimizer: AI-driven parameter optimization
+
+Author: ALwrity Team
+Version: 2.0
+Last Updated: December 2025
+"""
+
+from .google_search_service import GoogleSearchService
+from .exa_service import ExaService
+from .tavily_service import TavilyService
+
+# Core Research Engine (v2.0)
+from .core import (
+    ResearchEngine,
+    ResearchContext,
+    ResearchPersonalizationContext,
+    ContentType,
+    ResearchGoal,
+    ResearchDepth,
+    ProviderPreference,
+    ParameterOptimizer,
+)
+
+__all__ = [
+    # Legacy services (still used by blog writer)
+    "GoogleSearchService",
+    "ExaService",
+    "TavilyService",
+    
+    # Core Research Engine (v2.0)
+    "ResearchEngine",
+    "ResearchContext",
+    "ResearchPersonalizationContext",
+    "ContentType",
+    "ResearchGoal",
+    "ResearchDepth",
+    "ProviderPreference",
+    "ParameterOptimizer",
+]
--- a/backend/services/research/competitor_analysis_prompts.py
+++ b/backend/services/research/competitor_analysis_prompts.py
@@ -0,0 +1,270 @@
+"""
+AI Prompts for Competitor Analysis
+
+This module contains prompts for analyzing competitor data from Exa API
+to generate actionable insights for content strategy and competitive positioning.
+"""
+
+COMPETITOR_ANALYSIS_PROMPT = """
+You are a competitive intelligence analyst specializing in content strategy and market positioning. 
+
+**TASK**: Analyze competitor data to provide actionable insights for content strategy and competitive positioning.
+
+**COMPETITOR DATA**:
+{competitor_context}
+
+**USER'S WEBSITE**: {user_url}
+**INDUSTRY CONTEXT**: {industry_context}
+
+**ANALYSIS REQUIREMENTS**:
+
+1. **Market Position Analysis**
+   - Identify the competitive landscape structure
+   - Determine market leaders vs. challengers
+   - Assess market saturation and opportunities
+
+2. **Content Strategy Insights**
+   - Analyze competitor content themes and topics
+   - Identify content gaps and opportunities
+   - Suggest unique content angles for differentiation
+
+3. **Competitive Advantages**
+   - Highlight what makes each competitor unique
+   - Identify areas where the user can differentiate
+   - Suggest positioning strategies
+
+4. **SEO and Marketing Insights**
+   - Analyze competitor positioning and messaging
+   - Identify keyword and content opportunities
+   - Suggest marketing strategies
+
+**OUTPUT FORMAT** (JSON):
+{{
+    "market_analysis": {{
+        "competitive_landscape": "Description of market structure",
+        "market_leaders": ["List of top 3 competitors"],
+        "market_opportunities": ["List of 3-5 opportunities"],
+        "saturation_level": "high/medium/low"
+    }},
+    "content_strategy": {{
+        "common_themes": ["List of common content themes"],
+        "content_gaps": ["List of 5 content opportunities"],
+        "unique_angles": ["List of 3 unique content angles"],
+        "content_frequency_insights": "Analysis of publishing patterns"
+    }},
+    "competitive_positioning": {{
+        "differentiation_opportunities": ["List of 5 ways to differentiate"],
+        "unique_value_propositions": ["List of 3 unique positioning ideas"],
+        "target_audience_insights": "Analysis of competitor audience targeting"
+    }},
+    "seo_opportunities": {{
+        "keyword_gaps": ["List of 5 keyword opportunities"],
+        "content_topics": ["List of 5 high-value content topics"],
+        "marketing_channels": ["List of competitor marketing strategies"]
+    }},
+    "actionable_recommendations": [
+        "List of 5 specific, actionable recommendations"
+    ],
+    "risk_assessment": {{
+        "competitive_threats": ["List of 3 main threats"],
+        "market_barriers": ["List of 2-3 barriers to entry"],
+        "success_factors": ["List of 3 key success factors"]
+    }}
+}}
+
+**INSTRUCTIONS**:
+- Be specific and actionable in your recommendations
+- Focus on opportunities for differentiation
+- Consider the user's industry context
+- Prioritize recommendations by impact and feasibility
+- Use data from the competitor analysis to support insights
+- Keep recommendations practical and implementable
+
+**QUALITY STANDARDS**:
+- Each recommendation should be specific and actionable
+- Insights should be based on actual competitor data
+- Focus on differentiation and competitive advantage
+- Consider both short-term and long-term strategies
+- Ensure recommendations are relevant to the user's industry
+"""
+
+CONTENT_GAP_ANALYSIS_PROMPT = """
+You are a content strategist analyzing competitor content to identify gaps and opportunities.
+
+**TASK**: Analyze competitor content patterns to identify content gaps and opportunities.
+
+**COMPETITOR CONTENT DATA**:
+{competitor_context}
+
+**USER'S INDUSTRY**: {industry_context}
+**TARGET AUDIENCE**: {target_audience}
+
+**ANALYSIS FOCUS**:
+
+1. **Content Topic Analysis**
+   - Identify most common content topics across competitors
+   - Find underserved or missing topics
+   - Analyze content depth and quality patterns
+
+2. **Content Format Opportunities**
+   - Identify popular content formats among competitors
+   - Find format gaps and opportunities
+   - Suggest innovative content approaches
+
+3. **Audience Targeting Gaps**
+   - Analyze competitor audience targeting
+   - Identify underserved audience segments
+   - Suggest audience expansion opportunities
+
+4. **SEO Content Opportunities**
+   - Identify high-value keywords competitors are missing
+   - Find long-tail keyword opportunities
+   - Suggest content clusters for SEO
+
+**OUTPUT FORMAT** (JSON):
+{{
+    "content_gaps": [
+        {{
+            "topic": "Specific content topic",
+            "opportunity_level": "high/medium/low",
+            "reasoning": "Why this is an opportunity",
+            "content_angle": "Unique angle for this topic",
+            "estimated_difficulty": "easy/medium/hard"
+        }}
+    ],
+    "format_opportunities": [
+        {{
+            "format": "Content format type",
+            "gap_reason": "Why competitors aren't using this",
+            "potential_impact": "Expected impact level",
+            "implementation_tips": "How to implement"
+        }}
+    ],
+    "audience_gaps": [
+        {{
+            "audience_segment": "Underserved audience",
+            "opportunity_size": "large/medium/small",
+            "content_needs": "What content this audience needs",
+            "engagement_strategy": "How to engage this audience"
+        }}
+    ],
+    "seo_opportunities": [
+        {{
+            "keyword_theme": "Keyword cluster theme",
+            "search_volume": "estimated_high/medium/low",
+            "competition_level": "low/medium/high",
+            "content_ideas": ["3-5 content ideas for this theme"]
+        }}
+    ],
+    "priority_recommendations": [
+        "Top 5 prioritized content opportunities with implementation order"
+    ]
+}}
+"""
+
+COMPETITIVE_INTELLIGENCE_PROMPT = """
+You are a competitive intelligence expert providing strategic insights for market positioning.
+
+**TASK**: Generate comprehensive competitive intelligence insights for strategic decision-making.
+
+**COMPETITOR INTELLIGENCE DATA**:
+{competitor_context}
+
+**BUSINESS CONTEXT**:
+- User Website: {user_url}
+- Industry: {industry_context}
+- Business Model: {business_model}
+- Target Market: {target_market}
+
+**INTELLIGENCE AREAS**:
+
+1. **Competitive Landscape Mapping**
+   - Market positioning analysis
+   - Competitive strength assessment
+   - Market share estimation
+
+2. **Strategic Positioning Opportunities**
+   - Blue ocean opportunities
+   - Differentiation strategies
+   - Competitive moats
+
+3. **Threat Assessment**
+   - Competitive threats
+   - Market disruption risks
+   - Barrier to entry analysis
+
+4. **Growth Strategy Insights**
+   - Market expansion opportunities
+   - Partnership possibilities
+   - Acquisition targets
+
+**OUTPUT FORMAT** (JSON):
+{{
+    "competitive_landscape": {{
+        "market_structure": "Description of market structure",
+        "key_players": [
+            {{
+                "name": "Competitor name",
+                "position": "market_leader/challenger/niche",
+                "strengths": ["List of key strengths"],
+                "weaknesses": ["List of key weaknesses"],
+                "market_share": "estimated_percentage"
+            }}
+        ],
+        "market_dynamics": "Analysis of market trends and forces"
+    }},
+    "positioning_opportunities": {{
+        "blue_ocean_opportunities": ["List of uncontested market spaces"],
+        "differentiation_strategies": ["List of positioning strategies"],
+        "competitive_advantages": ["List of potential advantages to build"]
+    }},
+    "threat_analysis": {{
+        "immediate_threats": ["List of current competitive threats"],
+        "future_risks": ["List of potential future risks"],
+        "market_barriers": ["List of barriers to success"]
+    }},
+    "strategic_recommendations": {{
+        "short_term_actions": ["List of 3-5 immediate actions"],
+        "medium_term_strategy": ["List of 3-5 strategic initiatives"],
+        "long_term_vision": ["List of 2-3 long-term strategic goals"]
+    }},
+    "success_metrics": {{
+        "kpis_to_track": ["List of key performance indicators"],
+        "competitive_benchmarks": ["List of metrics to benchmark against"],
+        "success_thresholds": ["List of success criteria"]
+    }}
+}}
+"""
+
+# Utility function to format prompts with data
+def format_competitor_analysis_prompt(competitor_context: str, user_url: str, industry_context: str = None) -> str:
+    """Format the competitor analysis prompt with actual data."""
+    return COMPETITOR_ANALYSIS_PROMPT.format(
+        competitor_context=competitor_context,
+        user_url=user_url,
+        industry_context=industry_context or "Not specified"
+    )
+
+def format_content_gap_prompt(competitor_context: str, industry_context: str = None, target_audience: str = None) -> str:
+    """Format the content gap analysis prompt with actual data."""
+    return CONTENT_GAP_ANALYSIS_PROMPT.format(
+        competitor_context=competitor_context,
+        industry_context=industry_context or "Not specified",
+        target_audience=target_audience or "Not specified"
+    )
+
+def format_competitive_intelligence_prompt(
+    competitor_context: str, 
+    user_url: str, 
+    industry_context: str = None,
+    business_model: str = None,
+    target_market: str = None
+) -> str:
+    """Format the competitive intelligence prompt with actual data."""
+    return COMPETITIVE_INTELLIGENCE_PROMPT.format(
+        competitor_context=competitor_context,
+        user_url=user_url,
+        industry_context=industry_context or "Not specified",
+        business_model=business_model or "Not specified",
+        target_market=target_market or "Not specified"
+    )
--- a/backend/services/research/core/init.py
+++ b/backend/services/research/core/init.py
@@ -0,0 +1,51 @@
+"""
+Research Engine Core Module
+
+This is the standalone AI Research Engine that can be imported by
+Blog Writer, Podcast Maker, YouTube Creator, and other ALwrity tools.
+
+Design Goals:
+- Tool-agnostic: Any content tool can import and use this
+- AI-driven parameter optimization: Users don't need to understand Exa/Tavily internals
+- Provider priority: Exa → Tavily → Google (fallback)
+- Personalization-aware: Accepts context from calling tools
+- Advanced by default: Prioritizes quality over speed
+
+Usage:
+    from services.research.core import ResearchEngine, ResearchContext
+
+    engine = ResearchEngine()
+    result = await engine.research(ResearchContext(
+        query="AI trends in healthcare 2025",
+        content_type=ContentType.BLOG,
+        persona_context={"industry": "Healthcare", "audience": "Medical professionals"}
+    ))
+
+Author: ALwrity Team
+Version: 2.0
+Last Updated: December 2025
+"""
+
+from .research_context import (
+    ResearchContext,
+    ResearchPersonalizationContext,
+    ContentType,
+    ResearchGoal,
+    ResearchDepth,
+    ProviderPreference,
+)
+from .parameter_optimizer import ParameterOptimizer
+from .research_engine import ResearchEngine
+
+__all__ = [
+    # Context schemas
+    "ResearchContext",
+    "ResearchPersonalizationContext",
+    "ContentType",
+    "ResearchGoal",
+    "ResearchDepth",
+    "ProviderPreference",
+    # Core classes
+    "ParameterOptimizer",
+    "ResearchEngine",
+]
--- a/backend/services/research/core/parameter_optimizer.py
+++ b/backend/services/research/core/parameter_optimizer.py
@@ -0,0 +1,384 @@
+"""
+AI Parameter Optimizer for Research Engine
+
+Uses AI to analyze the research query and context to select optimal
+parameters for Exa and Tavily APIs. This abstracts the complexity
+from non-technical users.
+
+Key Decisions:
+- Provider selection (Exa vs Tavily vs Google)
+- Search type (neural vs keyword)
+- Category/topic selection
+- Depth and result limits
+- Domain filtering
+
+Author: ALwrity Team
+Version: 2.0
+"""
+
+import os
+import re
+from typing import Dict, Any, Optional, Tuple
+from loguru import logger
+
+from .research_context import (
+    ResearchContext,
+    ResearchGoal,
+    ResearchDepth,
+    ProviderPreference,
+    ContentType,
+)
+from models.blog_models import ResearchConfig, ResearchProvider, ResearchMode
+
+
+class ParameterOptimizer:
+    """
+    AI-driven parameter optimization for research providers.
+    
+    Analyzes the research context and selects optimal parameters
+    for Exa, Tavily, or Google without requiring user expertise.
+    """
+    
+    # Query patterns for intelligent routing
+    TRENDING_PATTERNS = [
+        r'\b(latest|recent|new|2024|2025|current|trending|news)\b',
+        r'\b(update|announcement|launch|release)\b',
+    ]
+    
+    TECHNICAL_PATTERNS = [
+        r'\b(api|sdk|framework|library|implementation|architecture)\b',
+        r'\b(code|programming|developer|technical|engineering)\b',
+    ]
+    
+    COMPETITIVE_PATTERNS = [
+        r'\b(competitor|alternative|vs|versus|compare|comparison)\b',
+        r'\b(market|industry|landscape|players)\b',
+    ]
+    
+    FACTUAL_PATTERNS = [
+        r'\b(statistics|data|research|study|report|survey)\b',
+        r'\b(percent|percentage|number|figure|metric)\b',
+    ]
+    
+    # Exa category mapping based on query analysis
+    EXA_CATEGORY_MAP = {
+        'research': 'research paper',
+        'news': 'news',
+        'company': 'company',
+        'personal': 'personal site',
+        'github': 'github',
+        'linkedin': 'linkedin profile',
+        'finance': 'financial report',
+    }
+    
+    # Tavily topic mapping
+    TAVILY_TOPIC_MAP = {
+        ResearchGoal.TRENDING: 'news',
+        ResearchGoal.FACTUAL: 'general',
+        ResearchGoal.COMPETITIVE: 'general',
+        ResearchGoal.TECHNICAL: 'general',
+        ResearchGoal.EDUCATIONAL: 'general',
+        ResearchGoal.INSPIRATIONAL: 'general',
+    }
+    
+    def __init__(self):
+        """Initialize the optimizer."""
+        self.exa_available = bool(os.getenv("EXA_API_KEY"))
+        self.tavily_available = bool(os.getenv("TAVILY_API_KEY"))
+        logger.info(f"ParameterOptimizer initialized: exa={self.exa_available}, tavily={self.tavily_available}")
+    
+    def optimize(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]:
+        """
+        Analyze research context and return optimized provider and config.
+        
+        Args:
+            context: The research context from the calling tool
+            
+        Returns:
+            Tuple of (selected_provider, optimized_config)
+        """
+        # If advanced mode, use raw parameters
+        if context.advanced_mode:
+            return self._build_advanced_config(context)
+        
+        # Analyze query to determine optimal approach
+        query_analysis = self._analyze_query(context.query)
+        
+        # Select provider based on analysis and preferences
+        provider = self._select_provider(context, query_analysis)
+        
+        # Build optimized config for selected provider
+        config = self._build_config(context, provider, query_analysis)
+        
+        logger.info(f"Optimized research: provider={provider.value}, mode={config.mode.value}")
+        
+        return provider, config
+    
+    def _analyze_query(self, query: str) -> Dict[str, Any]:
+        """
+        Analyze the query to understand intent and optimal approach.
+        
+        Returns dict with:
+        - is_trending: Query is about recent/current events
+        - is_technical: Query is technical in nature
+        - is_competitive: Query is about competition/comparison
+        - is_factual: Query needs data/statistics
+        - suggested_category: Exa category if applicable
+        - suggested_topic: Tavily topic
+        """
+        query_lower = query.lower()
+        
+        analysis = {
+            'is_trending': self._matches_patterns(query_lower, self.TRENDING_PATTERNS),
+            'is_technical': self._matches_patterns(query_lower, self.TECHNICAL_PATTERNS),
+            'is_competitive': self._matches_patterns(query_lower, self.COMPETITIVE_PATTERNS),
+            'is_factual': self._matches_patterns(query_lower, self.FACTUAL_PATTERNS),
+            'suggested_category': None,
+            'suggested_topic': 'general',
+            'suggested_search_type': 'auto',
+        }
+        
+        # Determine Exa category
+        if 'research' in query_lower or 'study' in query_lower or 'paper' in query_lower:
+            analysis['suggested_category'] = 'research paper'
+        elif 'github' in query_lower or 'repository' in query_lower:
+            analysis['suggested_category'] = 'github'
+        elif 'linkedin' in query_lower or 'professional' in query_lower:
+            analysis['suggested_category'] = 'linkedin profile'
+        elif analysis['is_trending']:
+            analysis['suggested_category'] = 'news'
+        elif 'company' in query_lower or 'startup' in query_lower:
+            analysis['suggested_category'] = 'company'
+        
+        # Determine Tavily topic
+        if analysis['is_trending']:
+            analysis['suggested_topic'] = 'news'
+        elif 'finance' in query_lower or 'stock' in query_lower or 'investment' in query_lower:
+            analysis['suggested_topic'] = 'finance'
+        else:
+            analysis['suggested_topic'] = 'general'
+        
+        # Determine search type
+        if analysis['is_technical'] or analysis['is_factual']:
+            analysis['suggested_search_type'] = 'neural'  # Better for semantic understanding
+        elif analysis['is_trending']:
+            analysis['suggested_search_type'] = 'keyword'  # Better for current events
+        
+        return analysis
+    
+    def _matches_patterns(self, text: str, patterns: list) -> bool:
+        """Check if text matches any of the patterns."""
+        for pattern in patterns:
+            if re.search(pattern, text, re.IGNORECASE):
+                return True
+        return False
+    
+    def _select_provider(self, context: ResearchContext, analysis: Dict[str, Any]) -> ResearchProvider:
+        """
+        Select the optimal provider based on context and query analysis.
+        
+        Priority: Exa → Tavily → Google for ALL modes (including basic).
+        This provides better semantic search results for content creators.
+        
+        Exa's neural search excels at understanding context and meaning,
+        which is valuable for all research types, not just technical queries.
+        """
+        preference = context.provider_preference
+        
+        # If user explicitly requested a provider, respect that
+        if preference == ProviderPreference.EXA:
+            if self.exa_available:
+                return ResearchProvider.EXA
+            logger.warning("Exa requested but not available, falling back")
+            
+        if preference == ProviderPreference.TAVILY:
+            if self.tavily_available:
+                return ResearchProvider.TAVILY
+            logger.warning("Tavily requested but not available, falling back")
+            
+        if preference == ProviderPreference.GOOGLE:
+            return ResearchProvider.GOOGLE
+        
+        # AUTO mode: Always prefer Exa → Tavily → Google
+        # Exa provides superior semantic search for all content types
+        if self.exa_available:
+            logger.info(f"Selected Exa (primary provider): query analysis shows " +
+                       f"technical={analysis.get('is_technical', False)}, " +
+                       f"trending={analysis.get('is_trending', False)}")
+            return ResearchProvider.EXA
+        
+        # Tavily as secondary option - good for real-time and news
+        if self.tavily_available:
+            logger.info(f"Selected Tavily (secondary): Exa unavailable, " +
+                       f"trending={analysis.get('is_trending', False)}")
+            return ResearchProvider.TAVILY
+        
+        # Google grounding as fallback
+        logger.info("Selected Google (fallback): Exa and Tavily unavailable")
+        return ResearchProvider.GOOGLE
+    
+    def _build_config(
+        self,
+        context: ResearchContext,
+        provider: ResearchProvider,
+        analysis: Dict[str, Any]
+    ) -> ResearchConfig:
+        """Build optimized ResearchConfig for the selected provider."""
+        
+        # Map ResearchDepth to ResearchMode
+        mode_map = {
+            ResearchDepth.QUICK: ResearchMode.BASIC,
+            ResearchDepth.STANDARD: ResearchMode.BASIC,
+            ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE,
+            ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE,
+        }
+        mode = mode_map.get(context.depth, ResearchMode.BASIC)
+        
+        # Base config
+        config = ResearchConfig(
+            mode=mode,
+            provider=provider,
+            max_sources=context.max_sources,
+            include_statistics=context.personalization.include_statistics if context.personalization else True,
+            include_expert_quotes=context.personalization.include_expert_quotes if context.personalization else True,
+            include_competitors=analysis['is_competitive'],
+            include_trends=analysis['is_trending'],
+        )
+        
+        # Provider-specific optimizations
+        if provider == ResearchProvider.EXA:
+            config = self._optimize_exa_config(config, context, analysis)
+        elif provider == ResearchProvider.TAVILY:
+            config = self._optimize_tavily_config(config, context, analysis)
+        
+        # Apply domain filters
+        if context.include_domains:
+            if provider == ResearchProvider.EXA:
+                config.exa_include_domains = context.include_domains
+            elif provider == ResearchProvider.TAVILY:
+                config.tavily_include_domains = context.include_domains[:300]  # Tavily limit
+        
+        if context.exclude_domains:
+            if provider == ResearchProvider.EXA:
+                config.exa_exclude_domains = context.exclude_domains
+            elif provider == ResearchProvider.TAVILY:
+                config.tavily_exclude_domains = context.exclude_domains[:150]  # Tavily limit
+        
+        return config
+    
+    def _optimize_exa_config(
+        self,
+        config: ResearchConfig,
+        context: ResearchContext,
+        analysis: Dict[str, Any]
+    ) -> ResearchConfig:
+        """Add Exa-specific optimizations."""
+        
+        # Set category based on analysis
+        if analysis['suggested_category']:
+            config.exa_category = analysis['suggested_category']
+        
+        # Set search type
+        config.exa_search_type = analysis.get('suggested_search_type', 'auto')
+        
+        # For comprehensive research, use neural search
+        if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
+            config.exa_search_type = 'neural'
+        
+        return config
+    
+    def _optimize_tavily_config(
+        self,
+        config: ResearchConfig,
+        context: ResearchContext,
+        analysis: Dict[str, Any]
+    ) -> ResearchConfig:
+        """Add Tavily-specific optimizations."""
+        
+        # Set topic based on analysis
+        config.tavily_topic = analysis.get('suggested_topic', 'general')
+        
+        # Set search depth based on research depth
+        if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
+            config.tavily_search_depth = 'advanced'  # 2 credits, but better results
+            config.tavily_chunks_per_source = 3
+        else:
+            config.tavily_search_depth = 'basic'  # 1 credit
+        
+        # Set time range based on recency
+        if context.recency:
+            recency_map = {
+                'day': 'd',
+                'week': 'w', 
+                'month': 'm',
+                'year': 'y',
+            }
+            config.tavily_time_range = recency_map.get(context.recency, context.recency)
+        elif analysis['is_trending']:
+            config.tavily_time_range = 'w'  # Last week for trending topics
+        
+        # Include answer for comprehensive research
+        if context.depth in [ResearchDepth.COMPREHENSIVE, ResearchDepth.EXPERT]:
+            config.tavily_include_answer = 'advanced'
+        
+        # Include raw content for expert depth
+        if context.depth == ResearchDepth.EXPERT:
+            config.tavily_include_raw_content = 'markdown'
+        
+        return config
+    
+    def _build_advanced_config(self, context: ResearchContext) -> Tuple[ResearchProvider, ResearchConfig]:
+        """
+        Build config from raw advanced parameters.
+        Used when advanced_mode=True and user wants full control.
+        """
+        # Determine provider from explicit parameters
+        provider = ResearchProvider.GOOGLE
+        
+        if context.exa_category or context.exa_search_type:
+            provider = ResearchProvider.EXA if self.exa_available else ResearchProvider.GOOGLE
+        elif context.tavily_topic or context.tavily_search_depth:
+            provider = ResearchProvider.TAVILY if self.tavily_available else ResearchProvider.GOOGLE
+        
+        # Check preference override
+        if context.provider_preference == ProviderPreference.EXA and self.exa_available:
+            provider = ResearchProvider.EXA
+        elif context.provider_preference == ProviderPreference.TAVILY and self.tavily_available:
+            provider = ResearchProvider.TAVILY
+        elif context.provider_preference == ProviderPreference.GOOGLE:
+            provider = ResearchProvider.GOOGLE
+        
+        # Map depth to mode
+        mode_map = {
+            ResearchDepth.QUICK: ResearchMode.BASIC,
+            ResearchDepth.STANDARD: ResearchMode.BASIC,
+            ResearchDepth.COMPREHENSIVE: ResearchMode.COMPREHENSIVE,
+            ResearchDepth.EXPERT: ResearchMode.COMPREHENSIVE,
+        }
+        mode = mode_map.get(context.depth, ResearchMode.BASIC)
+        
+        # Build config with raw parameters
+        config = ResearchConfig(
+            mode=mode,
+            provider=provider,
+            max_sources=context.max_sources,
+            # Exa
+            exa_category=context.exa_category,
+            exa_search_type=context.exa_search_type,
+            exa_include_domains=context.include_domains,
+            exa_exclude_domains=context.exclude_domains,
+            # Tavily
+            tavily_topic=context.tavily_topic,
+            tavily_search_depth=context.tavily_search_depth,
+            tavily_include_domains=context.include_domains[:300] if context.include_domains else [],
+            tavily_exclude_domains=context.exclude_domains[:150] if context.exclude_domains else [],
+            tavily_include_answer=context.tavily_include_answer,
+            tavily_include_raw_content=context.tavily_include_raw_content,
+            tavily_time_range=context.tavily_time_range,
+            tavily_country=context.tavily_country,
+        )
+        
+        logger.info(f"Advanced config: provider={provider.value}, mode={mode.value}")
+        
+        return provider, config
+
--- a/backend/services/research/core/research_context.py
+++ b/backend/services/research/core/research_context.py
@@ -0,0 +1,198 @@
+"""
+Research Context Schema
+
+Defines the unified input schema for the Research Engine.
+Any tool (Blog Writer, Podcast Maker, YouTube Creator) can create a ResearchContext
+and pass it to the Research Engine.
+
+Author: ALwrity Team
+Version: 2.0
+"""
+
+from enum import Enum
+from typing import Optional, List, Dict, Any
+from pydantic import BaseModel, Field
+
+
+class ContentType(str, Enum):
+    """Type of content being created - affects research focus."""
+    BLOG = "blog"
+    PODCAST = "podcast"
+    VIDEO = "video"
+    SOCIAL = "social"
+    EMAIL = "email"
+    NEWSLETTER = "newsletter"
+    WHITEPAPER = "whitepaper"
+    GENERAL = "general"
+
+
+class ResearchGoal(str, Enum):
+    """Primary goal of the research - affects provider selection and depth."""
+    FACTUAL = "factual"          # Stats, data, citations
+    TRENDING = "trending"         # Current trends, news
+    COMPETITIVE = "competitive"   # Competitor analysis
+    EDUCATIONAL = "educational"   # How-to, explanations
+    INSPIRATIONAL = "inspirational"  # Stories, quotes
+    TECHNICAL = "technical"       # Deep technical content
+
+
+class ResearchDepth(str, Enum):
+    """Depth of research - maps to existing ResearchMode."""
+    QUICK = "quick"              # Fast, surface-level (maps to BASIC)
+    STANDARD = "standard"        # Balanced depth (maps to BASIC with more sources)
+    COMPREHENSIVE = "comprehensive"  # Deep research (maps to COMPREHENSIVE)
+    EXPERT = "expert"            # Maximum depth with expert sources
+
+
+class ProviderPreference(str, Enum):
+    """Provider preference - AUTO lets the engine decide."""
+    AUTO = "auto"       # AI decides based on query (default)
+    EXA = "exa"         # Force Exa neural search
+    TAVILY = "tavily"   # Force Tavily AI search
+    GOOGLE = "google"   # Force Google grounding
+    HYBRID = "hybrid"   # Use multiple providers
+
+
+class ResearchPersonalizationContext(BaseModel):
+    """
+    Context from the calling tool (Blog Writer, Podcast Maker, etc.)
+    This personalizes the research without the Research Engine knowing
+    the specific tool implementation.
+    """
+    # Who is creating the content
+    creator_id: Optional[str] = None  # Clerk user ID
+    
+    # Content context
+    content_type: ContentType = ContentType.GENERAL
+    industry: Optional[str] = None
+    target_audience: Optional[str] = None
+    tone: Optional[str] = None  # professional, casual, technical, etc.
+    
+    # Persona data (from onboarding)
+    persona_id: Optional[str] = None
+    brand_voice: Optional[str] = None
+    competitor_urls: List[str] = Field(default_factory=list)
+    
+    # Content requirements
+    word_count_target: Optional[int] = None
+    include_statistics: bool = True
+    include_expert_quotes: bool = True
+    include_case_studies: bool = False
+    include_visuals: bool = False
+    
+    # Platform-specific hints
+    platform: Optional[str] = None  # medium, wordpress, youtube, spotify, etc.
+    
+    class Config:
+        use_enum_values = True
+
+
+class ResearchContext(BaseModel):
+    """
+    Main input schema for the Research Engine.
+    
+    This is what any tool passes to the Research Engine to get research results.
+    The engine uses AI to optimize parameters based on this context.
+    """
+    # Primary research input
+    query: str = Field(..., description="Main research query or topic")
+    keywords: List[str] = Field(default_factory=list, description="Additional keywords")
+    
+    # Research configuration
+    goal: ResearchGoal = ResearchGoal.FACTUAL
+    depth: ResearchDepth = ResearchDepth.STANDARD
+    provider_preference: ProviderPreference = ProviderPreference.AUTO
+    
+    # Personalization from calling tool
+    personalization: Optional[ResearchPersonalizationContext] = None
+    
+    # Constraints
+    max_sources: int = Field(default=10, ge=1, le=25)
+    recency: Optional[str] = None  # "day", "week", "month", "year", None for all-time
+    
+    # Domain filtering
+    include_domains: List[str] = Field(default_factory=list)
+    exclude_domains: List[str] = Field(default_factory=list)
+    
+    # Advanced mode (exposes raw provider parameters)
+    advanced_mode: bool = False
+    
+    # Raw provider parameters (only used if advanced_mode=True)
+    # Exa-specific
+    exa_category: Optional[str] = None
+    exa_search_type: Optional[str] = None  # auto, keyword, neural
+    
+    # Tavily-specific
+    tavily_topic: Optional[str] = None  # general, news, finance
+    tavily_search_depth: Optional[str] = None  # basic, advanced
+    tavily_include_answer: bool = False
+    tavily_include_raw_content: bool = False
+    tavily_time_range: Optional[str] = None
+    tavily_country: Optional[str] = None
+    
+    class Config:
+        use_enum_values = True
+    
+    def get_effective_query(self) -> str:
+        """Build effective query combining query and keywords."""
+        if self.keywords:
+            return f"{self.query} {' '.join(self.keywords)}"
+        return self.query
+    
+    def get_industry(self) -> str:
+        """Get industry from personalization or default."""
+        if self.personalization and self.personalization.industry:
+            return self.personalization.industry
+        return "General"
+    
+    def get_audience(self) -> str:
+        """Get target audience from personalization or default."""
+        if self.personalization and self.personalization.target_audience:
+            return self.personalization.target_audience
+        return "General"
+    
+    def get_user_id(self) -> Optional[str]:
+        """Get user ID from personalization."""
+        if self.personalization:
+            return self.personalization.creator_id
+        return None
+
+
+class ResearchResult(BaseModel):
+    """
+    Output schema from the Research Engine.
+    Standardized format that any tool can consume.
+    """
+    success: bool = True
+    
+    # Content
+    summary: Optional[str] = None  # AI-generated summary of findings
+    raw_content: Optional[str] = None  # Raw aggregated content for LLM processing
+    
+    # Sources
+    sources: List[Dict[str, Any]] = Field(default_factory=list)
+    
+    # Analysis (reuses existing blog writer analysis)
+    keyword_analysis: Dict[str, Any] = Field(default_factory=dict)
+    competitor_analysis: Dict[str, Any] = Field(default_factory=dict)
+    suggested_angles: List[str] = Field(default_factory=list)
+    
+    # Metadata
+    provider_used: str = "google"  # Which provider was actually used
+    search_queries: List[str] = Field(default_factory=list)
+    grounding_metadata: Optional[Dict[str, Any]] = None
+    
+    # Cost tracking
+    estimated_cost: float = 0.0
+    
+    # Error handling
+    error_message: Optional[str] = None
+    error_code: Optional[str] = None
+    retry_suggested: bool = False
+    
+    # Original context for reference
+    original_query: Optional[str] = None
+    
+    class Config:
+        use_enum_values = True
+
--- a/backend/services/research/core/research_engine.py
+++ b/backend/services/research/core/research_engine.py
@@ -0,0 +1,558 @@
+"""
+Research Engine - Core Orchestrator
+
+The main entry point for AI research across all ALwrity tools.
+This engine wraps existing providers (Exa, Tavily, Google) and provides
+a unified interface for any content generation tool.
+
+Usage:
+    from services.research.core import ResearchEngine, ResearchContext, ContentType
+
+    engine = ResearchEngine()
+    result = await engine.research(ResearchContext(
+        query="AI trends in healthcare 2025",
+        content_type=ContentType.PODCAST,
+        personalization=ResearchPersonalizationContext(
+            industry="Healthcare",
+            target_audience="Medical professionals"
+        )
+    ))
+
+Author: ALwrity Team
+Version: 2.0
+"""
+
+import os
+import time
+from typing import Dict, Any, Optional, Callable
+from loguru import logger
+
+from .research_context import (
+    ResearchContext,
+    ResearchResult,
+    ResearchDepth,
+    ContentType,
+    ResearchPersonalizationContext,
+)
+from .parameter_optimizer import ParameterOptimizer
+
+# Reuse existing blog writer models and services
+from models.blog_models import (
+    BlogResearchRequest,
+    BlogResearchResponse,
+    ResearchConfig,
+    ResearchProvider,
+    ResearchMode,
+    PersonaInfo,
+    ResearchSource,
+)
+
+# Research persona for personalization
+from models.research_persona_models import ResearchPersona
+
+
+class ResearchEngine:
+    """
+    AI Research Engine - Standalone module for content research.
+    
+    This engine:
+    1. Accepts a ResearchContext from any tool
+    2. Uses AI to optimize parameters for Exa/Tavily
+    3. Integrates research persona for personalization
+    4. Executes research using existing providers
+    5. Returns standardized ResearchResult
+    
+    Can be imported by Blog Writer, Podcast Maker, YouTube Creator, etc.
+    """
+    
+    def __init__(self, db_session=None):
+        """Initialize the Research Engine."""
+        self.optimizer = ParameterOptimizer()
+        self._providers_initialized = False
+        self._exa_provider = None
+        self._tavily_provider = None
+        self._google_provider = None
+        self._db_session = db_session
+        
+        # Check provider availability
+        self.exa_available = bool(os.getenv("EXA_API_KEY"))
+        self.tavily_available = bool(os.getenv("TAVILY_API_KEY"))
+        
+        logger.info(f"ResearchEngine initialized: exa={self.exa_available}, tavily={self.tavily_available}")
+    
+    def _get_research_persona(self, user_id: str, generate_if_missing: bool = True) -> Optional[ResearchPersona]:
+        """
+        Fetch research persona for user, generating if missing.
+        
+        Phase 2: Since onboarding is mandatory and always completes before accessing
+        any tool, we can safely generate research persona on first use. This ensures
+        hyper-personalization without requiring "General" fallbacks.
+        
+        Args:
+            user_id: User ID (Clerk string)
+            generate_if_missing: If True, generate persona if not cached (default: True)
+            
+        Returns:
+            ResearchPersona if successful, None only if user has no core persona
+        """
+        if not user_id:
+            return None
+        
+        try:
+            from services.research.research_persona_service import ResearchPersonaService
+            
+            db = self._db_session
+            if not db:
+                from services.database import get_db_session
+                db = get_db_session()
+            
+            persona_service = ResearchPersonaService(db_session=db)
+            
+            if generate_if_missing:
+                # Phase 2: Use get_or_generate() to create persona on first visit
+                # This triggers LLM call if not cached, but onboarding guarantees
+                # core persona exists, so generation will succeed
+                logger.info(f"🔄 Getting/generating research persona for user {user_id}...")
+                persona = persona_service.get_or_generate(user_id, force_refresh=False)
+                
+                if persona:
+                    logger.info(f"✅ Research persona ready for user {user_id}: industry={persona.default_industry}")
+                else:
+                    logger.warning(f"⚠️ Could not get/generate research persona for user {user_id} - using core persona fallback")
+            else:
+                # Fast path: only return cached (for config endpoints)
+                persona = persona_service.get_cached_only(user_id)
+                if persona:
+                    logger.debug(f"Research persona loaded from cache for user {user_id}")
+            
+            return persona
+            
+        except Exception as e:
+            logger.warning(f"Failed to load research persona for user {user_id}: {e}")
+            return None
+    
+    def _enrich_context_with_persona(
+        self, 
+        context: ResearchContext, 
+        persona: ResearchPersona
+    ) -> ResearchContext:
+        """
+        Enrich the research context with persona data.
+        
+        Only applies persona defaults if the context doesn't already have values.
+        User-provided values always take precedence.
+        """
+        # Create personalization context if not exists
+        if not context.personalization:
+            context.personalization = ResearchPersonalizationContext()
+        
+        # Apply persona defaults only if not already set
+        if not context.personalization.industry or context.personalization.industry == "General":
+            if persona.default_industry:
+                context.personalization.industry = persona.default_industry
+                logger.debug(f"Applied persona industry: {persona.default_industry}")
+        
+        if not context.personalization.target_audience or context.personalization.target_audience == "General":
+            if persona.default_target_audience:
+                context.personalization.target_audience = persona.default_target_audience
+                logger.debug(f"Applied persona target_audience: {persona.default_target_audience}")
+        
+        # Apply suggested Exa domains if not already set
+        if not context.include_domains and persona.suggested_exa_domains:
+            context.include_domains = persona.suggested_exa_domains[:6]  # Limit to 6 domains
+            logger.debug(f"Applied persona domains: {context.include_domains}")
+        
+        # Apply suggested Exa category if not already set
+        if not context.exa_category and persona.suggested_exa_category:
+            context.exa_category = persona.suggested_exa_category
+            logger.debug(f"Applied persona exa_category: {persona.suggested_exa_category}")
+        
+        return context
+    
+    async def research(
+        self,
+        context: ResearchContext,
+        progress_callback: Optional[Callable[[str], None]] = None
+    ) -> ResearchResult:
+        """
+        Execute research based on the given context.
+        
+        Args:
+            context: Research context with query, goals, and personalization
+            progress_callback: Optional callback for progress updates
+            
+        Returns:
+            ResearchResult with sources, analysis, and content
+        """
+        start_time = time.time()
+        
+        try:
+            # Progress update
+            self._progress(progress_callback, "🔍 Analyzing research query...")
+            
+            # Enrich context with research persona (Phase 2: generate if missing)
+            user_id = context.get_user_id()
+            if user_id:
+                self._progress(progress_callback, "👤 Loading personalized research profile...")
+                persona = self._get_research_persona(user_id, generate_if_missing=True)
+                if persona:
+                    self._progress(progress_callback, "✨ Applying hyper-personalized settings...")
+                    context = self._enrich_context_with_persona(context, persona)
+                else:
+                    logger.warning(f"No research persona available for user {user_id} - proceeding with provided context")
+            
+            # Optimize parameters based on enriched context
+            provider, config = self.optimizer.optimize(context)
+            
+            self._progress(progress_callback, f"🤖 Selected {provider.value.upper()} for research")
+            
+            # Build the request using existing blog models
+            request = self._build_request(context, config)
+            user_id = context.get_user_id() or ""
+            
+            # Execute research using appropriate provider
+            self._progress(progress_callback, f"🌐 Connecting to {provider.value} search...")
+            
+            if provider == ResearchProvider.EXA:
+                response = await self._execute_exa_research(request, config, user_id, progress_callback)
+            elif provider == ResearchProvider.TAVILY:
+                response = await self._execute_tavily_research(request, config, user_id, progress_callback)
+            else:
+                response = await self._execute_google_research(request, config, user_id, progress_callback)
+            
+            # Transform response to ResearchResult
+            self._progress(progress_callback, "📊 Processing results...")
+            
+            result = self._transform_response(response, provider, context)
+            
+            duration_ms = (time.time() - start_time) * 1000
+            logger.info(f"Research completed in {duration_ms:.0f}ms: {len(result.sources)} sources")
+            
+            self._progress(progress_callback, f"✅ Research complete: {len(result.sources)} sources found")
+            
+            return result
+            
+        except Exception as e:
+            logger.error(f"Research failed: {e}")
+            return ResearchResult(
+                success=False,
+                error_message=str(e),
+                error_code="RESEARCH_FAILED",
+                retry_suggested=True,
+                original_query=context.query
+            )
+    
+    def _progress(self, callback: Optional[Callable[[str], None]], message: str):
+        """Send progress update if callback provided."""
+        if callback:
+            callback(message)
+        logger.info(f"[Research] {message}")
+    
+    def _build_request(self, context: ResearchContext, config: ResearchConfig) -> BlogResearchRequest:
+        """Build BlogResearchRequest from ResearchContext."""
+        
+        # Extract keywords from query
+        keywords = context.keywords if context.keywords else [context.query]
+        
+        # Build persona info from personalization
+        persona = None
+        if context.personalization:
+            persona = PersonaInfo(
+                persona_id=context.personalization.persona_id,
+                tone=context.personalization.tone,
+                audience=context.personalization.target_audience,
+                industry=context.personalization.industry,
+            )
+        
+        return BlogResearchRequest(
+            keywords=keywords,
+            topic=context.query,
+            industry=context.get_industry(),
+            target_audience=context.get_audience(),
+            tone=context.personalization.tone if context.personalization else None,
+            word_count_target=context.personalization.word_count_target if context.personalization else 1500,
+            persona=persona,
+            research_mode=config.mode,
+            config=config,
+        )
+    
+    async def _execute_exa_research(
+        self,
+        request: BlogResearchRequest,
+        config: ResearchConfig,
+        user_id: str,
+        progress_callback: Optional[Callable[[str], None]] = None
+    ) -> BlogResearchResponse:
+        """Execute research using Exa provider."""
+        from services.blog_writer.research.exa_provider import ExaResearchProvider
+        from services.blog_writer.research.research_strategies import get_strategy_for_mode
+        
+        self._progress(progress_callback, "🔍 Executing Exa neural search...")
+        
+        # Get strategy for building prompt
+        strategy = get_strategy_for_mode(config.mode)
+        topic = request.topic or ", ".join(request.keywords)
+        industry = request.industry or "General"
+        target_audience = request.target_audience or "General"
+        
+        research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
+        
+        # Execute Exa search
+        try:
+            exa_provider = ExaResearchProvider()
+            raw_result = await exa_provider.search(
+                research_prompt, topic, industry, target_audience, config, user_id
+            )
+            
+            # Track usage
+            cost = raw_result.get('cost', {}).get('total', 0.005) if isinstance(raw_result.get('cost'), dict) else 0.005
+            exa_provider.track_exa_usage(user_id, cost)
+            
+            self._progress(progress_callback, f"📝 Found {len(raw_result.get('sources', []))} sources")
+            
+            # Run common analysis
+            return await self._run_analysis(request, raw_result, config, user_id, progress_callback)
+            
+        except RuntimeError as e:
+            if "EXA_API_KEY not configured" in str(e):
+                logger.warning("Exa not configured, falling back to Tavily")
+                self._progress(progress_callback, "⚠️ Exa unavailable, trying Tavily...")
+                config.provider = ResearchProvider.TAVILY
+                return await self._execute_tavily_research(request, config, user_id, progress_callback)
+            raise
+    
+    async def _execute_tavily_research(
+        self,
+        request: BlogResearchRequest,
+        config: ResearchConfig,
+        user_id: str,
+        progress_callback: Optional[Callable[[str], None]] = None
+    ) -> BlogResearchResponse:
+        """Execute research using Tavily provider."""
+        from services.blog_writer.research.tavily_provider import TavilyResearchProvider
+        from services.blog_writer.research.research_strategies import get_strategy_for_mode
+        
+        self._progress(progress_callback, "🔍 Executing Tavily AI search...")
+        
+        # Get strategy for building prompt
+        strategy = get_strategy_for_mode(config.mode)
+        topic = request.topic or ", ".join(request.keywords)
+        industry = request.industry or "General"
+        target_audience = request.target_audience or "General"
+        
+        research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
+        
+        # Execute Tavily search
+        try:
+            tavily_provider = TavilyResearchProvider()
+            raw_result = await tavily_provider.search(
+                research_prompt, topic, industry, target_audience, config, user_id
+            )
+            
+            # Track usage
+            cost = raw_result.get('cost', {}).get('total', 0.001) if isinstance(raw_result.get('cost'), dict) else 0.001
+            search_depth = config.tavily_search_depth or "basic"
+            tavily_provider.track_tavily_usage(user_id, cost, search_depth)
+            
+            self._progress(progress_callback, f"📝 Found {len(raw_result.get('sources', []))} sources")
+            
+            # Run common analysis
+            return await self._run_analysis(request, raw_result, config, user_id, progress_callback)
+            
+        except RuntimeError as e:
+            if "TAVILY_API_KEY not configured" in str(e):
+                logger.warning("Tavily not configured, falling back to Google")
+                self._progress(progress_callback, "⚠️ Tavily unavailable, using Google Search...")
+                config.provider = ResearchProvider.GOOGLE
+                return await self._execute_google_research(request, config, user_id, progress_callback)
+            raise
+    
+    async def _execute_google_research(
+        self,
+        request: BlogResearchRequest,
+        config: ResearchConfig,
+        user_id: str,
+        progress_callback: Optional[Callable[[str], None]] = None
+    ) -> BlogResearchResponse:
+        """Execute research using Google/Gemini grounding."""
+        from services.blog_writer.research.google_provider import GoogleResearchProvider
+        from services.blog_writer.research.research_strategies import get_strategy_for_mode
+        
+        self._progress(progress_callback, "🔍 Executing Google Search grounding...")
+        
+        # Get strategy for building prompt
+        strategy = get_strategy_for_mode(config.mode)
+        topic = request.topic or ", ".join(request.keywords)
+        industry = request.industry or "General"
+        target_audience = request.target_audience or "General"
+        
+        research_prompt = strategy.build_research_prompt(topic, industry, target_audience, config)
+        
+        # Execute Google search
+        google_provider = GoogleResearchProvider()
+        raw_result = await google_provider.search(
+            research_prompt, topic, industry, target_audience, config, user_id
+        )
+        
+        self._progress(progress_callback, "📝 Processing grounded results...")
+        
+        # Run common analysis
+        return await self._run_analysis(request, raw_result, config, user_id, progress_callback, is_google=True)
+    
+    async def _run_analysis(
+        self,
+        request: BlogResearchRequest,
+        raw_result: Dict[str, Any],
+        config: ResearchConfig,
+        user_id: str,
+        progress_callback: Optional[Callable[[str], None]] = None,
+        is_google: bool = False
+    ) -> BlogResearchResponse:
+        """Run common analysis on raw results."""
+        from services.blog_writer.research.keyword_analyzer import KeywordAnalyzer
+        from services.blog_writer.research.competitor_analyzer import CompetitorAnalyzer
+        from services.blog_writer.research.content_angle_generator import ContentAngleGenerator
+        from services.blog_writer.research.data_filter import ResearchDataFilter
+        
+        self._progress(progress_callback, "🔍 Analyzing keywords and content angles...")
+        
+        # Extract content for analysis
+        if is_google:
+            content = raw_result.get("content", "")
+            sources = self._extract_sources_from_grounding(raw_result)
+            search_queries = raw_result.get("search_queries", []) or []
+            grounding_metadata = self._extract_grounding_metadata(raw_result)
+        else:
+            content = raw_result.get('content', '')
+            sources = [ResearchSource(**s) if isinstance(s, dict) else s for s in raw_result.get('sources', [])]
+            search_queries = raw_result.get('search_queries', [])
+            grounding_metadata = None
+        
+        topic = request.topic or ", ".join(request.keywords)
+        industry = request.industry or "General"
+        
+        # Run analyzers
+        keyword_analyzer = KeywordAnalyzer()
+        competitor_analyzer = CompetitorAnalyzer()
+        content_angle_generator = ContentAngleGenerator()
+        data_filter = ResearchDataFilter()
+        
+        keyword_analysis = keyword_analyzer.analyze(content, request.keywords, user_id=user_id)
+        competitor_analysis = competitor_analyzer.analyze(content, user_id=user_id)
+        suggested_angles = content_angle_generator.generate(content, topic, industry, user_id=user_id)
+        
+        # Build response
+        response = BlogResearchResponse(
+            success=True,
+            sources=sources,
+            keyword_analysis=keyword_analysis,
+            competitor_analysis=competitor_analysis,
+            suggested_angles=suggested_angles,
+            search_widget="",
+            search_queries=search_queries,
+            grounding_metadata=grounding_metadata,
+            original_keywords=request.keywords,
+        )
+        
+        # Filter and clean research data
+        self._progress(progress_callback, "✨ Filtering and optimizing results...")
+        filtered_response = data_filter.filter_research_data(response)
+        
+        return filtered_response
+    
+    def _extract_sources_from_grounding(self, gemini_result: Dict[str, Any]) -> list:
+        """Extract sources from Gemini grounding metadata."""
+        from models.blog_models import ResearchSource
+        
+        sources = []
+        if not gemini_result or not isinstance(gemini_result, dict):
+            return sources
+        
+        raw_sources = gemini_result.get("sources", []) or []
+        
+        for src in raw_sources:
+            source = ResearchSource(
+                title=src.get("title", "Untitled"),
+                url=src.get("url", ""),
+                excerpt=src.get("content", "")[:500] if src.get("content") else f"Source from {src.get('title', 'web')}",
+                credibility_score=float(src.get("credibility_score", 0.8)),
+                published_at=str(src.get("publication_date", "2024-01-01")),
+                index=src.get("index"),
+                source_type=src.get("type", "web")
+            )
+            sources.append(source)
+        
+        return sources
+    
+    def _extract_grounding_metadata(self, gemini_result: Dict[str, Any]) -> Optional[Dict[str, Any]]:
+        """Extract grounding metadata from Gemini result."""
+        if not gemini_result or not isinstance(gemini_result, dict):
+            return None
+        
+        return gemini_result.get("grounding_metadata")
+    
+    def _transform_response(
+        self,
+        response: BlogResearchResponse,
+        provider: ResearchProvider,
+        context: ResearchContext
+    ) -> ResearchResult:
+        """Transform BlogResearchResponse to ResearchResult."""
+        
+        # Convert sources to dicts
+        sources = []
+        for s in response.sources:
+            if hasattr(s, 'dict'):
+                sources.append(s.dict())
+            elif isinstance(s, dict):
+                sources.append(s)
+            else:
+                sources.append({
+                    'title': getattr(s, 'title', ''),
+                    'url': getattr(s, 'url', ''),
+                    'excerpt': getattr(s, 'excerpt', ''),
+                })
+        
+        # Extract grounding metadata
+        grounding = None
+        if response.grounding_metadata:
+            if hasattr(response.grounding_metadata, 'dict'):
+                grounding = response.grounding_metadata.dict()
+            else:
+                grounding = response.grounding_metadata
+        
+        return ResearchResult(
+            success=response.success,
+            sources=sources,
+            keyword_analysis=response.keyword_analysis,
+            competitor_analysis=response.competitor_analysis,
+            suggested_angles=response.suggested_angles,
+            provider_used=provider.value,
+            search_queries=response.search_queries,
+            grounding_metadata=grounding,
+            original_query=context.query,
+            error_message=response.error_message,
+            error_code=response.error_code if hasattr(response, 'error_code') else None,
+            retry_suggested=response.retry_suggested if hasattr(response, 'retry_suggested') else False,
+        )
+    
+    def get_provider_status(self) -> Dict[str, Any]:
+        """Get status of available providers."""
+        return {
+            "exa": {
+                "available": self.exa_available,
+                "priority": 1,
+                "description": "Neural search for semantic understanding"
+            },
+            "tavily": {
+                "available": self.tavily_available,
+                "priority": 2,
+                "description": "AI-powered web search"
+            },
+            "google": {
+                "available": True,  # Always available via Gemini
+                "priority": 3,
+                "description": "Google Search grounding"
+            }
+        }
+
--- a/backend/services/research/exa_service.py
+++ b/backend/services/research/exa_service.py
@@ -0,0 +1,794 @@
+"""
+Exa API Service for ALwrity
+
+This service provides competitor discovery and analysis using the Exa API,
+which uses neural search to find semantically similar websites and content.
+
+Key Features:
+- Competitor discovery using neural search
+- Content analysis and summarization
+- Competitive intelligence gathering
+- Cost-effective API usage with caching
+- Integration with onboarding Step 3
+
+Dependencies:
+- aiohttp (for async HTTP requests)
+- os (for environment variables)
+- logging (for debugging)
+
+Author: ALwrity Team
+Version: 1.0
+Last Updated: January 2025
+"""
+
+import os
+import json
+import asyncio
+from typing import Dict, List, Optional, Any, Union
+from datetime import datetime, timedelta
+from loguru import logger
+from urllib.parse import urlparse
+from exa_py import Exa
+
+class ExaService:
+    """
+    Service for competitor discovery and analysis using the Exa API.
+    
+    This service provides neural search capabilities to find semantically similar
+    websites and analyze their content for competitive intelligence.
+    """
+    
+    def __init__(self):
+        """Initialize the Exa Service with API credentials."""
+        self.api_key = os.getenv("EXA_API_KEY")
+        self.exa = None
+        self.enabled = False
+
+        # Don't assume key is available at import time in production.
+        # Keys may be injected per-request via middleware, so defer init.
+        self._try_initialize()
+
+    def _try_initialize(self) -> None:
+        """Attempt to (re)initialize the Exa SDK from current environment."""
+        if self.enabled and self.exa:
+            return
+        try:
+            self.api_key = os.getenv("EXA_API_KEY")
+            if not self.api_key:
+                # Leave disabled; caller may try again after middleware injection
+                logger.warning("EXA_API_KEY not configured; Exa service will be disabled")
+                self.enabled = False
+                self.exa = None
+                return
+            self.exa = Exa(api_key=self.api_key)
+            self.enabled = True
+            logger.info("Exa Service initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize Exa service: {e}")
+            self.enabled = False
+            self.exa = None
+    
+    async def discover_competitors(
+        self,
+        user_url: str,
+        num_results: int = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        industry_context: Optional[str] = None,
+        website_analysis_data: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Discover competitors for a given website using Exa's neural search.
+        
+        Args:
+            user_url: The website URL to find competitors for
+            num_results: Number of competitor results to return (max 100)
+            include_domains: List of domains to include in search
+            exclude_domains: List of domains to exclude from search
+            industry_context: Industry context for better competitor discovery
+            
+        Returns:
+            Dictionary containing competitor analysis results
+        """
+        try:
+            # Ensure we pick up any per-request injected key
+            self._try_initialize()
+            if not self.enabled:
+                raise ValueError("Exa Service is not enabled - API key missing")
+            
+            logger.info(f"Starting competitor discovery for: {user_url}")
+            
+            # Extract user domain for exclusion
+            user_domain = urlparse(user_url).netloc
+            exclude_domains_list = exclude_domains or []
+            exclude_domains_list.append(user_domain)
+            
+            logger.info(f"Excluding domains: {exclude_domains_list}")
+            
+            # Extract insights from website analysis for better targeting
+            include_text_queries = []
+            summary_query = f"Business model, target audience, content strategy{f' in {industry_context}' if industry_context else ''}"
+            
+            if website_analysis_data:
+                analysis = website_analysis_data.get('analysis', {})
+                
+                # Extract key business terms from the analysis
+                if 'target_audience' in analysis:
+                    audience = analysis['target_audience']
+                    if isinstance(audience, dict) and 'primary_audience' in audience:
+                        primary_audience = audience['primary_audience']
+                        if len(primary_audience.split()) <= 5:  # Exa limit
+                            include_text_queries.append(primary_audience)
+                
+                # Use industry context from analysis
+                if 'industry' in analysis and analysis['industry']:
+                    industry = analysis['industry']
+                    if len(industry.split()) <= 5:
+                        include_text_queries.append(industry)
+                
+                # Enhance summary query with analysis insights
+                if 'content_type' in analysis:
+                    content_type = analysis['content_type']
+                    summary_query += f", {content_type} content strategy"
+                
+                logger.info(f"Enhanced targeting with analysis data: {include_text_queries}")
+            
+            # Use the Exa SDK to find similar links with content and context
+            search_result = self.exa.find_similar_and_contents(
+                url=user_url,
+                num_results=min(num_results, 10),  # Exa API limit
+                include_domains=include_domains,
+                exclude_domains=exclude_domains_list,
+                include_text=include_text_queries if include_text_queries else None,
+                text=True,
+                highlights={
+                    "numSentences": 2,
+                    "highlightsPerUrl": 3,
+                    "query": "Unique value proposition, competitive advantages, market position"
+                },
+                summary={
+                    "query": summary_query
+                }
+            )
+            
+            # TODO: Add context generation once SDK supports it
+            # For now, we'll generate a basic context from the results
+            context_result = None
+            
+            # Log the raw Exa API response summary (avoiding verbose markdown content)
+            logger.info(f"📊 Exa API response for {user_url}:")
+            logger.info(f"  ├─ Request ID: {getattr(search_result, 'request_id', 'N/A')}")
+            logger.info(f"  ├─ Results count: {len(getattr(search_result, 'results', []))}")
+            logger.info(f"  └─ Cost: ${getattr(getattr(search_result, 'cost_dollars', None), 'total', 0)}")
+            
+            # Note: Full raw response contains verbose markdown content - logging only summary
+            # To see full response, set EXA_DEBUG=true in environment
+            
+            # Extract results from search
+            results = getattr(search_result, 'results', [])
+            
+            # Log summary of results
+            logger.info(f"  - Found {len(results)} competitors")
+            
+            # Process and structure the results
+            competitors = self._process_competitor_results(search_result, user_url)
+            
+            logger.info(f"Successfully discovered {len(competitors)} competitors for {user_url}")
+            
+            return {
+                "success": True,
+                "user_url": user_url,
+                "competitors": competitors,
+                "total_competitors": len(competitors),
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+                "industry_context": industry_context,
+                "api_cost": getattr(getattr(search_result, 'cost_dollars', None), 'total', 0) if hasattr(search_result, 'cost_dollars') and getattr(search_result, 'cost_dollars', None) else 0,
+                "request_id": getattr(search_result, 'request_id', None) if hasattr(search_result, 'request_id') else None
+            }
+                        
+        except asyncio.TimeoutError:
+            logger.error("Exa API request timed out")
+            return {
+                "success": False,
+                "error": "Request timed out",
+                "details": "The competitor discovery request took too long to complete"
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in competitor discovery: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during competitor discovery"
+            }
+    
+    def _process_competitor_results(self, search_result, user_url: str) -> List[Dict[str, Any]]:
+        """
+        Process and structure the Exa SDK response into competitor data.
+        
+        Args:
+            search_result: Response from Exa SDK
+            user_url: Original user URL for reference
+            
+        Returns:
+            List of processed competitor data
+        """
+        competitors = []
+        user_domain = urlparse(user_url).netloc
+        
+        # Extract results from the SDK response
+        results = getattr(search_result, 'results', [])
+        
+        for result in results:
+            try:
+                # Extract basic information from the result object
+                competitor_url = getattr(result, 'url', '')
+                competitor_domain = urlparse(competitor_url).netloc
+                
+                # Skip if it's the same domain as the user
+                if competitor_domain == user_domain:
+                    continue
+                
+                # Extract content insights
+                summary = getattr(result, 'summary', '')
+                highlights = getattr(result, 'highlights', [])
+                highlight_scores = getattr(result, 'highlight_scores', [])
+                
+                # Calculate competitive relevance score
+                relevance_score = self._calculate_relevance_score(result, user_url)
+                
+                competitor_data = {
+                    "url": competitor_url,
+                    "domain": competitor_domain,
+                    "title": getattr(result, 'title', ''),
+                    "published_date": getattr(result, 'published_date', None),
+                    "author": getattr(result, 'author', None),
+                    "favicon": getattr(result, 'favicon', None),
+                    "image": getattr(result, 'image', None),
+                    "summary": summary,
+                    "highlights": highlights,
+                    "highlight_scores": highlight_scores,
+                    "relevance_score": relevance_score,
+                    "competitive_insights": self._extract_competitive_insights(summary, highlights),
+                    "content_analysis": self._analyze_content_quality(result)
+                }
+                
+                competitors.append(competitor_data)
+                
+            except Exception as e:
+                logger.warning(f"Error processing competitor result: {str(e)}")
+                continue
+        
+        # Sort by relevance score (highest first)
+        competitors.sort(key=lambda x: x["relevance_score"], reverse=True)
+        
+        return competitors
+    
+    def _calculate_relevance_score(self, result, user_url: str) -> float:
+        """
+        Calculate a relevance score for competitor ranking.
+        
+        Args:
+            result: Competitor result from Exa SDK
+            user_url: Original user URL
+            
+        Returns:
+            Relevance score between 0 and 1
+        """
+        score = 0.0
+        
+        # Base score from highlight scores
+        highlight_scores = getattr(result, 'highlight_scores', [])
+        if highlight_scores:
+            score += sum(highlight_scores) / len(highlight_scores) * 0.4
+        
+        # Score from summary quality
+        summary = getattr(result, 'summary', '')
+        if summary and len(summary) > 100:
+            score += 0.3
+        
+        # Score from title relevance
+        title = getattr(result, 'title', '').lower()
+        if any(keyword in title for keyword in ["business", "company", "service", "solution", "platform"]):
+            score += 0.2
+        
+        # Score from URL structure similarity
+        competitor_url = getattr(result, 'url', '')
+        if self._url_structure_similarity(user_url, competitor_url) > 0.5:
+            score += 0.1
+        
+        return min(score, 1.0)
+    
+    def _url_structure_similarity(self, url1: str, url2: str) -> float:
+        """
+        Calculate URL structure similarity.
+        
+        Args:
+            url1: First URL
+            url2: Second URL
+            
+        Returns:
+            Similarity score between 0 and 1
+        """
+        try:
+            parsed1 = urlparse(url1)
+            parsed2 = urlparse(url2)
+            
+            # Compare path structure
+            path1_parts = [part for part in parsed1.path.split('/') if part]
+            path2_parts = [part for part in parsed2.path.split('/') if part]
+            
+            if not path1_parts or not path2_parts:
+                return 0.0
+            
+            # Calculate similarity based on path length and structure
+            max_parts = max(len(path1_parts), len(path2_parts))
+            common_parts = sum(1 for p1, p2 in zip(path1_parts, path2_parts) if p1 == p2)
+            
+            return common_parts / max_parts
+            
+        except Exception:
+            return 0.0
+    
+    def _extract_competitive_insights(self, summary: str, highlights: List[str]) -> Dict[str, Any]:
+        """
+        Extract competitive insights from summary and highlights.
+        
+        Args:
+            summary: Content summary
+            highlights: Content highlights
+            
+        Returns:
+            Dictionary of competitive insights
+        """
+        insights = {
+            "business_model": "",
+            "target_audience": "",
+            "value_proposition": "",
+            "competitive_advantages": [],
+            "content_strategy": ""
+        }
+        
+        # Combine summary and highlights for analysis
+        content = f"{summary} {' '.join(highlights)}".lower()
+        
+        # Extract business model indicators
+        business_models = ["saas", "platform", "service", "product", "consulting", "agency", "marketplace"]
+        for model in business_models:
+            if model in content:
+                insights["business_model"] = model.title()
+                break
+        
+        # Extract target audience indicators
+        audiences = ["enterprise", "small business", "startups", "developers", "marketers", "consumers"]
+        for audience in audiences:
+            if audience in content:
+                insights["target_audience"] = audience.title()
+                break
+        
+        # Extract value proposition from highlights
+        if highlights:
+            insights["value_proposition"] = highlights[0][:100] + "..." if len(highlights[0]) > 100 else highlights[0]
+        
+        return insights
+    
+    def _analyze_content_quality(self, result) -> Dict[str, Any]:
+        """
+        Analyze the content quality of a competitor.
+        
+        Args:
+            result: Competitor result from Exa SDK
+            
+        Returns:
+            Dictionary of content quality metrics
+        """
+        quality_metrics = {
+            "content_depth": "medium",
+            "technical_sophistication": "medium",
+            "content_freshness": "unknown",
+            "engagement_potential": "medium"
+        }
+        
+        # Analyze content depth from summary length
+        summary = getattr(result, 'summary', '')
+        if len(summary) > 300:
+            quality_metrics["content_depth"] = "high"
+        elif len(summary) < 100:
+            quality_metrics["content_depth"] = "low"
+        
+        # Analyze technical sophistication
+        technical_keywords = ["api", "integration", "automation", "analytics", "data", "platform"]
+        highlights = getattr(result, 'highlights', [])
+        content_text = f"{summary} {' '.join(highlights)}".lower()
+        
+        technical_count = sum(1 for keyword in technical_keywords if keyword in content_text)
+        if technical_count >= 3:
+            quality_metrics["technical_sophistication"] = "high"
+        elif technical_count == 0:
+            quality_metrics["technical_sophistication"] = "low"
+        
+        return quality_metrics
+    
+    async def discover_social_media_accounts(self, user_url: str) -> Dict[str, Any]:
+        """
+        Discover social media accounts for a given website using Exa's answer API.
+        
+        Args:
+            user_url: The website URL to find social media accounts for
+            
+        Returns:
+            Dictionary containing social media discovery results
+        """
+        try:
+            # Ensure we pick up any per-request injected key
+            self._try_initialize()
+            if not self.enabled:
+                raise ValueError("Exa Service is not enabled - API key missing")
+            
+            logger.info(f"Starting social media discovery for: {user_url}")
+            
+            # Extract domain from URL for better targeting
+            domain = urlparse(user_url).netloc.replace('www.', '')
+            
+            # Use Exa's answer API to find social media accounts
+            result = self.exa.answer(
+                f"Find all social media accounts of the url: {domain}. Return a JSON object with facebook, twitter, instagram, linkedin, youtube, and tiktok fields containing the URLs or empty strings if not found.",
+                model="exa-pro",
+                text=True
+            )
+            
+            # Log the raw Exa API response for debugging
+            logger.info(f"Raw Exa social media response for {user_url}:")
+            logger.info(f"  - Request ID: {getattr(result, 'request_id', 'N/A')}")
+            logger.info(f"  └─ Cost: ${getattr(getattr(result, 'cost_dollars', None), 'total', 0)}")
+            # Note: Full raw response contains verbose content - logging only summary
+            # To see full response, set EXA_DEBUG=true in environment
+            
+            # Extract social media data
+            answer_text = getattr(result, 'answer', '')
+            citations = getattr(result, 'citations', [])
+            
+            # Convert AnswerResult objects to dictionaries for JSON serialization
+            citations_dicts = []
+            for citation in citations:
+                if hasattr(citation, '__dict__'):
+                    # Convert object to dictionary
+                    citation_dict = {
+                        'id': getattr(citation, 'id', ''),
+                        'title': getattr(citation, 'title', ''),
+                        'url': getattr(citation, 'url', ''),
+                        'text': getattr(citation, 'text', ''),
+                        'snippet': getattr(citation, 'snippet', ''),
+                        'published_date': getattr(citation, 'published_date', None),
+                        'author': getattr(citation, 'author', None),
+                        'image': getattr(citation, 'image', None),
+                        'favicon': getattr(citation, 'favicon', None)
+                    }
+                    citations_dicts.append(citation_dict)
+                else:
+                    # If it's already a dict, use as is
+                    citations_dicts.append(citation)
+            
+            logger.info(f"  - Raw answer text: {answer_text}")
+            logger.info(f"  - Citations count: {len(citations_dicts)}")
+            
+            # Parse the response from the answer (could be JSON or markdown format)
+            try:
+                import json
+                import re
+                
+                if answer_text.strip().startswith('{'):
+                    # Direct JSON format
+                    answer_data = json.loads(answer_text.strip())
+                else:
+                    # Parse markdown format with URLs
+                    answer_data = {
+                        "facebook": "",
+                        "twitter": "",
+                        "instagram": "",
+                        "linkedin": "",
+                        "youtube": "",
+                        "tiktok": ""
+                    }
+                    
+                    # Extract URLs using regex patterns
+                    facebook_match = re.search(r'Facebook.*?\[([^\]]+)\]', answer_text)
+                    if facebook_match:
+                        answer_data["facebook"] = facebook_match.group(1)
+                    
+                    twitter_match = re.search(r'Twitter.*?\[([^\]]+)\]', answer_text)
+                    if twitter_match:
+                        answer_data["twitter"] = twitter_match.group(1)
+                    
+                    instagram_match = re.search(r'Instagram.*?\[([^\]]+)\]', answer_text)
+                    if instagram_match:
+                        answer_data["instagram"] = instagram_match.group(1)
+                    
+                    linkedin_match = re.search(r'LinkedIn.*?\[([^\]]+)\]', answer_text)
+                    if linkedin_match:
+                        answer_data["linkedin"] = linkedin_match.group(1)
+                    
+                    youtube_match = re.search(r'YouTube.*?\[([^\]]+)\]', answer_text)
+                    if youtube_match:
+                        answer_data["youtube"] = youtube_match.group(1)
+                    
+                    tiktok_match = re.search(r'TikTok.*?\[([^\]]+)\]', answer_text)
+                    if tiktok_match:
+                        answer_data["tiktok"] = tiktok_match.group(1)
+                        
+            except (json.JSONDecodeError, AttributeError, KeyError):
+                # If parsing fails, create empty structure
+                answer_data = {
+                    "facebook": "",
+                    "twitter": "",
+                    "instagram": "",
+                    "linkedin": "",
+                    "youtube": "",
+                    "tiktok": ""
+                }
+            
+            logger.info(f"  - Parsed social media accounts:")
+            for platform, url in answer_data.items():
+                if url:
+                    logger.info(f"    {platform}: {url}")
+            
+            return {
+                "success": True,
+                "user_url": user_url,
+                "social_media_accounts": answer_data,
+                "citations": citations_dicts,
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+                "api_cost": getattr(getattr(result, 'cost_dollars', None), 'total', 0) if hasattr(result, 'cost_dollars') and getattr(result, 'cost_dollars', None) else 0,
+                "request_id": getattr(result, 'request_id', None) if hasattr(result, 'request_id') else None
+            }
+                        
+        except Exception as e:
+            logger.error(f"Error in social media discovery: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during social media discovery"
+            }
+    
+    def _generate_basic_context(self, results: List[Any], user_url: str) -> str:
+        """
+        Generate a basic context string from competitor results for LLM consumption.
+        
+        Args:
+            results: List of competitor results from Exa API
+            user_url: Original user URL for reference
+            
+        Returns:
+            Formatted context string
+        """
+        context_parts = [
+            f"Competitive Analysis for: {user_url}",
+            f"Found {len(results)} similar websites/competitors:",
+            ""
+        ]
+        
+        for i, result in enumerate(results[:5], 1):  # Limit to top 5 for context
+            url = getattr(result, 'url', 'Unknown URL')
+            title = getattr(result, 'title', 'Unknown Title')
+            summary = getattr(result, 'summary', 'No summary available')
+            
+            context_parts.extend([
+                f"{i}. {title}",
+                f"   URL: {url}",
+                f"   Summary: {summary[:200]}{'...' if len(summary) > 200 else ''}",
+                ""
+            ])
+        
+        context_parts.append("Key insights:")
+        context_parts.append("- These competitors offer similar services or content")
+        context_parts.append("- Analyze their content strategy and positioning")
+        context_parts.append("- Identify opportunities for differentiation")
+        
+        return "\n".join(context_parts)
+            
+    async def analyze_competitor_content(
+        self,
+        competitor_url: str,
+        analysis_depth: str = "standard"
+    ) -> Dict[str, Any]:
+        """
+        Perform deeper analysis of a specific competitor.
+        
+        Args:
+            competitor_url: URL of the competitor to analyze
+            analysis_depth: Depth of analysis ("quick", "standard", "deep")
+            
+        Returns:
+            Dictionary containing detailed competitor analysis
+        """
+        try:
+            logger.info(f"Starting detailed analysis for competitor: {competitor_url}")
+            
+            # Get similar content from this competitor
+            similar_results = await self.discover_competitors(
+                competitor_url,
+                num_results=10,
+                include_domains=[urlparse(competitor_url).netloc]
+            )
+            
+            if not similar_results["success"]:
+                return similar_results
+            
+            # Analyze content patterns
+            content_patterns = self._analyze_content_patterns(similar_results["competitors"])
+            
+            # Generate competitive insights
+            competitive_insights = self._generate_competitive_insights(
+                competitor_url,
+                similar_results["competitors"],
+                content_patterns
+            )
+            
+            return {
+                "success": True,
+                "competitor_url": competitor_url,
+                "content_patterns": content_patterns,
+                "competitive_insights": competitive_insights,
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+                "analysis_depth": analysis_depth
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in competitor content analysis: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during competitor analysis"
+            }
+    
+    def _analyze_content_patterns(self, competitors: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Analyze content patterns across competitors.
+        
+        Args:
+            competitors: List of competitor data
+            
+        Returns:
+            Dictionary of content patterns
+        """
+        patterns = {
+            "common_themes": [],
+            "content_types": [],
+            "publishing_patterns": {},
+            "target_keywords": [],
+            "content_strategies": []
+        }
+        
+        # Analyze common themes
+        all_summaries = [comp.get("summary", "") for comp in competitors]
+        # This would be enhanced with NLP analysis in a full implementation
+        
+        # Analyze content types from URLs
+        content_types = set()
+        for comp in competitors:
+            url = comp.get("url", "")
+            if "/blog/" in url:
+                content_types.add("blog")
+            elif "/product/" in url or "/service/" in url:
+                content_types.add("product")
+            elif "/about/" in url:
+                content_types.add("about")
+            elif "/contact/" in url:
+                content_types.add("contact")
+        
+        patterns["content_types"] = list(content_types)
+        
+        return patterns
+    
+    def _generate_competitive_insights(
+        self,
+        competitor_url: str,
+        competitors: List[Dict[str, Any]],
+        content_patterns: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Generate competitive insights from analysis data.
+        
+        Args:
+            competitor_url: URL of the competitor
+            competitors: List of competitor data
+            content_patterns: Content pattern analysis
+            
+        Returns:
+            Dictionary of competitive insights
+        """
+        insights = {
+            "competitive_strengths": [],
+            "content_opportunities": [],
+            "market_positioning": "unknown",
+            "strategic_recommendations": []
+        }
+        
+        # Analyze competitive strengths
+        for comp in competitors:
+            if comp.get("relevance_score", 0) > 0.7:
+                insights["competitive_strengths"].append({
+                    "strength": comp.get("summary", "")[:100],
+                    "relevance": comp.get("relevance_score", 0)
+                })
+        
+        # Generate content opportunities
+        if content_patterns.get("content_types"):
+            insights["content_opportunities"] = [
+                f"Develop {content_type} content" 
+                for content_type in content_patterns["content_types"]
+            ]
+        
+        return insights
+    
+    def health_check(self) -> Dict[str, Any]:
+        """
+        Check the health of the Exa service.
+        
+        Returns:
+            Dictionary containing service health status
+        """
+        try:
+            # Ensure latest env before health check
+            self._try_initialize()
+            if not self.enabled:
+                return {
+                    "status": "disabled",
+                    "message": "Exa API key not configured",
+                    "timestamp": datetime.utcnow().isoformat()
+                }
+            
+            # Test with a simple request using the SDK directly
+            test_result = self.exa.find_similar(
+                url="https://example.com",
+                num_results=1
+            )
+            
+            # If we get here without an exception, the API is working
+            return {
+                "status": "healthy",
+                "message": "Exa API is operational",
+                "timestamp": datetime.utcnow().isoformat(),
+                "test_successful": True
+            }
+                
+        except Exception as e:
+            return {
+                "status": "error",
+                "message": f"Health check failed: {str(e)}",
+                "timestamp": datetime.utcnow().isoformat()
+            }
+    
+    def get_cost_estimate(self, num_results: int, include_content: bool = True) -> Dict[str, Any]:
+        """
+        Get cost estimate for Exa API usage.
+        
+        Args:
+            num_results: Number of results requested
+            include_content: Whether to include content analysis
+            
+        Returns:
+            Dictionary containing cost estimate
+        """
+        # Exa API pricing (as of documentation)
+        if num_results <= 25:
+            search_cost = 0.005
+        elif num_results <= 100:
+            search_cost = 0.025
+        else:
+            search_cost = 1.0
+        
+        content_cost = 0.0
+        if include_content:
+            # Estimate content analysis cost
+            content_cost = num_results * 0.001  # Rough estimate
+        
+        total_cost = search_cost + content_cost
+        
+        return {
+            "search_cost": search_cost,
+            "content_cost": content_cost,
+            "total_estimated_cost": total_cost,
+            "num_results": num_results,
+            "include_content": include_content
+        }
--- a/backend/services/research/google_search_service.py
+++ b/backend/services/research/google_search_service.py
@@ -0,0 +1,497 @@
+"""
+Google Search Service for ALwrity
+
+This service provides real-time industry research using Google Custom Search API,
+replacing the mock research system with actual web search capabilities.
+
+Key Features:
+- Industry-specific search queries
+- Source credibility scoring and ranking
+- Content extraction and insight generation
+- Real-time information from the last month
+- Fallback mechanisms for API failures
+
+Dependencies:
+- google-api-python-client
+- aiohttp (for async HTTP requests)
+- os (for environment variables)
+- logging (for debugging)
+
+Author: ALwrity Team
+Version: 1.0
+Last Updated: January 2025
+"""
+
+import os
+import json
+import asyncio
+import aiohttp
+from typing import Dict, List, Optional, Any
+from datetime import datetime, timedelta
+from loguru import logger
+
+class GoogleSearchService:
+    """
+    Service for conducting real industry research using Google Custom Search API.
+    
+    This service replaces the mock research system with actual web search capabilities,
+    providing current, relevant industry information for content grounding.
+    """
+    
+    def __init__(self):
+        """Initialize the Google Search Service with API credentials."""
+        self.api_key = os.getenv("GOOGLE_SEARCH_API_KEY")
+        self.search_engine_id = os.getenv("GOOGLE_SEARCH_ENGINE_ID")
+        self.base_url = "https://www.googleapis.com/customsearch/v1"
+        
+        if not self.api_key or not self.search_engine_id:
+            raise ValueError("Google Search API credentials not configured. Please set GOOGLE_SEARCH_API_KEY and GOOGLE_SEARCH_ENGINE_ID environment variables.")
+        else:
+            self.enabled = True
+            logger.info("Google Search Service initialized successfully")
+    
+    async def search_industry_trends(
+        self, 
+        topic: str, 
+        industry: str, 
+        max_results: int = 10
+    ) -> List[Dict[str, Any]]:
+        """
+        Search for current industry trends and insights.
+        
+        Args:
+            topic: The specific topic to research
+            industry: The industry context for the search
+            max_results: Maximum number of search results to return
+            
+        Returns:
+            List of search results with credibility scoring
+        """
+        if not self.enabled:
+            raise RuntimeError("Google Search Service is not enabled. Please configure API credentials.")
+        
+        try:
+            # Construct industry-specific search query
+            search_query = self._build_search_query(topic, industry)
+            logger.info(f"Searching for: {search_query}")
+            
+            # Perform the search
+            search_results = await self._perform_search(search_query, max_results)
+            
+            # Process and rank results
+            processed_results = await self._process_search_results(search_results, topic, industry)
+            
+            # Extract insights and statistics
+            insights = await self._extract_insights(processed_results, topic, industry)
+            
+            logger.info(f"Search completed successfully. Found {len(processed_results)} relevant sources.")
+            
+            return {
+                "sources": processed_results,
+                "key_insights": insights["insights"],
+                "statistics": insights["statistics"],
+                "grounding_enabled": True,
+                "search_query": search_query,
+                "timestamp": datetime.utcnow().isoformat()
+            }
+            
+        except Exception as e:
+            logger.error(f"Google search failed: {str(e)}")
+            raise RuntimeError(f"Google search failed: {str(e)}")
+    
+    def _build_search_query(self, topic: str, industry: str) -> str:
+        """
+        Build an optimized search query for industry research.
+        
+        Args:
+            topic: The specific topic to research
+            industry: The industry context
+            
+        Returns:
+            Optimized search query string
+        """
+        # Add industry-specific terms and current year for relevance
+        current_year = datetime.now().year
+        
+        # Industry-specific search patterns
+        industry_patterns = {
+            "Technology": ["trends", "innovations", "developments", "insights"],
+            "Healthcare": ["advances", "research", "treatments", "studies"],
+            "Finance": ["market analysis", "trends", "reports", "insights"],
+            "Marketing": ["strategies", "trends", "best practices", "case studies"],
+            "Education": ["innovations", "trends", "research", "best practices"]
+        }
+        
+        # Get industry-specific terms
+        industry_terms = industry_patterns.get(industry, ["trends", "insights", "developments"])
+        
+        # Build the query
+        query_components = [
+            topic,
+            industry,
+            f"{current_year}",
+            "latest",
+            "trends",
+            "insights"
+        ]
+        
+        # Add industry-specific terms
+        query_components.extend(industry_terms[:2])
+        
+        return " ".join(query_components)
+    
+    async def _perform_search(self, query: str, max_results: int) -> List[Dict[str, Any]]:
+        """
+        Perform the actual Google Custom Search API call.
+        
+        Args:
+            query: The search query to execute
+            max_results: Maximum number of results to return
+            
+        Returns:
+            Raw search results from Google API
+        """
+        params = {
+            "key": self.api_key,
+            "cx": self.search_engine_id,
+            "q": query,
+            "num": min(max_results, 10),  # Google CSE max is 10 per request
+            "dateRestrict": "m1",  # Last month
+            "sort": "date",  # Sort by date for current information
+            "safe": "active"  # Safe search for professional content
+        }
+        
+        async with aiohttp.ClientSession() as session:
+            async with session.get(self.base_url, params=params) as response:
+                if response.status == 200:
+                    data = await response.json()
+                    return data.get("items", [])
+                else:
+                    error_text = await response.text()
+                    logger.error(f"Google Search API error: {response.status} - {error_text}")
+                    raise Exception(f"Search API returned status {response.status}")
+    
+    async def _process_search_results(
+        self, 
+        raw_results: List[Dict[str, Any]], 
+        topic: str, 
+        industry: str
+    ) -> List[Dict[str, Any]]:
+        """
+        Process and rank search results by relevance and credibility.
+        
+        Args:
+            raw_results: Raw search results from Google API
+            topic: The research topic for relevance scoring
+            industry: The industry context for relevance scoring
+            
+        Returns:
+            Processed and ranked search results
+        """
+        processed_results = []
+        
+        for result in raw_results:
+            try:
+                # Extract basic information
+                title = result.get("title", "")
+                url = result.get("link", "")
+                snippet = result.get("snippet", "")
+                
+                # Calculate relevance score
+                relevance_score = self._calculate_relevance_score(title, snippet, topic, industry)
+                
+                # Calculate credibility score
+                credibility_score = self._calculate_credibility_score(url, title)
+                
+                # Extract publication date if available
+                publication_date = self._extract_publication_date(result)
+                
+                # Calculate domain authority
+                domain_authority = self._calculate_domain_authority(url)
+                
+                processed_result = {
+                    "title": title,
+                    "url": url,
+                    "content": snippet,
+                    "relevance_score": relevance_score,
+                    "credibility_score": credibility_score,
+                    "domain_authority": domain_authority,
+                    "publication_date": publication_date,
+                    "source_type": self._categorize_source(url, title),
+                    "raw_result": result
+                }
+                
+                processed_results.append(processed_result)
+                
+            except Exception as e:
+                logger.warning(f"Failed to process search result: {str(e)}")
+                continue
+        
+        # Sort by combined score (relevance + credibility)
+        processed_results.sort(
+            key=lambda x: (x["relevance_score"] + x["credibility_score"]) / 2,
+            reverse=True
+        )
+        
+        return processed_results
+    
+    def _calculate_relevance_score(self, title: str, snippet: str, topic: str, industry: str) -> float:
+        """
+        Calculate relevance score based on topic and industry alignment.
+        
+        Args:
+            title: The title of the search result
+            snippet: The snippet/description of the result
+            topic: The research topic
+            industry: The industry context
+            
+        Returns:
+            Relevance score between 0.0 and 1.0
+        """
+        score = 0.0
+        text = f"{title} {snippet}".lower()
+        
+        # Topic relevance (40% of score)
+        topic_words = topic.lower().split()
+        topic_matches = sum(1 for word in topic_words if word in text)
+        topic_score = min(topic_matches / len(topic_words), 1.0) * 0.4
+        
+        # Industry relevance (30% of score)
+        industry_words = industry.lower().split()
+        industry_matches = sum(1 for word in industry_words if word in text)
+        industry_score = min(industry_matches / len(industry_words), 1.0) * 0.3
+        
+        # Content quality indicators (30% of score)
+        quality_indicators = [
+            "research", "study", "analysis", "report", "insights",
+            "trends", "data", "statistics", "findings", "expert"
+        ]
+        quality_matches = sum(1 for indicator in quality_indicators if indicator in text)
+        quality_score = min(quality_matches / len(quality_indicators), 1.0) * 0.3
+        
+        score = topic_score + industry_score + quality_score
+        return round(score, 3)
+    
+    def _calculate_credibility_score(self, url: str, title: str) -> float:
+        """
+        Calculate credibility score based on URL and title analysis.
+        
+        Args:
+            url: The URL of the source
+            title: The title of the content
+            
+        Returns:
+            Credibility score between 0.0 and 1.0
+        """
+        score = 0.5  # Base score
+        
+        # Domain credibility indicators
+        credible_domains = [
+            "harvard.edu", "stanford.edu", "mit.edu", "berkeley.edu",  # Academic
+            "forbes.com", "bloomberg.com", "reuters.com", "wsj.com",   # Business
+            "nature.com", "science.org", "ieee.org", "acm.org",       # Scientific
+            "linkedin.com", "medium.com", "substack.com"              # Professional
+        ]
+        
+        # Check if domain is in credible list
+        domain = self._extract_domain(url)
+        if any(credible_domain in domain for credible_domain in credible_domains):
+            score += 0.3
+        
+        # Title credibility indicators
+        credible_indicators = [
+            "research", "study", "analysis", "report", "insights",
+            "expert", "professional", "industry", "trends"
+        ]
+        
+        title_lower = title.lower()
+        credible_matches = sum(1 for indicator in credible_indicators if indicator in title_lower)
+        score += min(credible_matches * 0.1, 0.2)
+        
+        return round(min(score, 1.0), 3)
+    
+    def _calculate_domain_authority(self, url: str) -> float:
+        """
+        Calculate domain authority based on URL analysis.
+        
+        Args:
+            url: The URL to analyze
+            
+        Returns:
+            Domain authority score between 0.0 and 1.0
+        """
+        domain = self._extract_domain(url)
+        
+        # High authority domains
+        high_authority = [
+            "harvard.edu", "stanford.edu", "mit.edu", "berkeley.edu",
+            "forbes.com", "bloomberg.com", "reuters.com", "wsj.com",
+            "nature.com", "science.org", "ieee.org", "acm.org"
+        ]
+        
+        # Medium authority domains
+        medium_authority = [
+            "linkedin.com", "medium.com", "substack.com", "techcrunch.com",
+            "venturebeat.com", "wired.com", "theverge.com"
+        ]
+        
+        if any(auth_domain in domain for auth_domain in high_authority):
+            return 0.9
+        elif any(auth_domain in domain for auth_domain in medium_authority):
+            return 0.7
+        else:
+            # Basic scoring for other domains
+            return 0.5
+    
+    def _extract_domain(self, url: str) -> str:
+        """Extract domain from URL."""
+        try:
+            from urllib.parse import urlparse
+            parsed = urlparse(url)
+            return parsed.netloc.lower()
+        except:
+            return url.lower()
+    
+    def _extract_publication_date(self, result: Dict[str, Any]) -> Optional[str]:
+        """Extract publication date from search result if available."""
+        # Check for various date fields
+        date_fields = ["pagemap", "metatags", "date"]
+        
+        for field in date_fields:
+            if field in result:
+                date_value = result[field]
+                if isinstance(date_value, dict):
+                    # Look for common date keys
+                    for date_key in ["date", "pubdate", "article:published_time"]:
+                        if date_key in date_value:
+                            return date_value[date_key]
+                elif isinstance(date_value, str):
+                    return date_value
+        
+        return None
+    
+    def _categorize_source(self, url: str, title: str) -> str:
+        """Categorize the source type based on URL and title."""
+        domain = self._extract_domain(url)
+        title_lower = title.lower()
+        
+        # Academic sources
+        if any(edu in domain for edu in [".edu", "harvard", "stanford", "mit"]):
+            return "academic"
+        
+        # Business/News sources
+        if any(biz in domain for biz in ["forbes", "bloomberg", "reuters", "wsj"]):
+            return "business_news"
+        
+        # Professional platforms
+        if any(prof in domain for prof in ["linkedin", "medium", "substack"]):
+            return "professional_platform"
+        
+        # Research/Scientific
+        if any(research in domain for research in ["nature", "science", "ieee", "acm"]):
+            return "research_scientific"
+        
+        # Industry reports
+        if any(report in title_lower for report in ["report", "study", "analysis", "research"]):
+            return "industry_report"
+        
+        return "general"
+    
+    async def _extract_insights(
+        self, 
+        sources: List[Dict[str, Any]], 
+        topic: str, 
+        industry: str
+    ) -> Dict[str, List[str]]:
+        """
+        Extract key insights and statistics from search results.
+        
+        Args:
+            sources: Processed search results
+            topic: The research topic
+            industry: The industry context
+            
+        Returns:
+            Dictionary containing insights and statistics
+        """
+        insights = []
+        statistics = []
+        
+        # Extract insights from top sources
+        top_sources = sources[:5]  # Top 5 most relevant sources
+        
+        for source in top_sources:
+            content = source.get("content", "")
+            
+            # Look for insight patterns
+            insight_patterns = [
+                "shows", "indicates", "suggests", "reveals", "demonstrates",
+                "highlights", "emphasizes", "points to", "suggests that"
+            ]
+            
+            for pattern in insight_patterns:
+                if pattern in content.lower():
+                    # Extract the sentence containing the insight
+                    sentences = content.split(". ")
+                    for sentence in sentences:
+                        if pattern in sentence.lower():
+                            insights.append(sentence.strip())
+                            break
+            
+            # Look for statistical patterns
+            stat_patterns = [
+                r'\d+%',  # Percentages
+                r'\d+ percent',  # Written percentages
+                r'\$\d+',  # Dollar amounts
+                r'\d+ million',  # Millions
+                r'\d+ billion',  # Billions
+                r'\d+ out of \d+',  # Ratios
+            ]
+            
+            import re
+            for pattern in stat_patterns:
+                matches = re.findall(pattern, content, re.IGNORECASE)
+                for match in matches:
+                    statistics.append(f"{match}")
+        
+        # Limit the number of insights and statistics
+        insights = insights[:10]  # Top 10 insights
+        statistics = statistics[:10]  # Top 10 statistics
+        
+        return {
+            "insights": insights,
+            "statistics": statistics
+        }
+    
+    
+    async def test_api_connection(self) -> Dict[str, Any]:
+        """
+        Test the Google Search API connection.
+        
+        Returns:
+            Test results and status information
+        """
+        if not self.enabled:
+            raise RuntimeError("Google Search Service is not enabled. Please configure API credentials.")
+        
+        try:
+            # Perform a simple test search
+            test_query = "AI technology trends 2024"
+            test_results = await self._perform_search(test_query, 1)
+            
+            return {
+                "status": "success",
+                "message": "Google Search API connection successful",
+                "enabled": True,
+                "test_results_count": len(test_results),
+                "api_key_configured": bool(self.api_key),
+                "search_engine_configured": bool(self.search_engine_id)
+            }
+            
+        except Exception as e:
+            return {
+                "status": "error",
+                "message": f"Google Search API connection failed: {str(e)}",
+                "enabled": False,
+                "error": str(e)
+            }
--- a/backend/services/research/intent/init.py
+++ b/backend/services/research/intent/init.py
@@ -0,0 +1,23 @@
+"""
+Research Intent Package
+
+This package provides intent-driven research capabilities:
+- Intent inference from user input
+- Targeted query generation
+- Intent-aware result analysis
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+from .research_intent_inference import ResearchIntentInference
+from .intent_query_generator import IntentQueryGenerator
+from .intent_aware_analyzer import IntentAwareAnalyzer
+from .intent_prompt_builder import IntentPromptBuilder
+
+__all__ = [
+    "ResearchIntentInference",
+    "IntentQueryGenerator", 
+    "IntentAwareAnalyzer",
+    "IntentPromptBuilder",
+]
--- a/backend/services/research/intent/intent_aware_analyzer.py
+++ b/backend/services/research/intent/intent_aware_analyzer.py
@@ -0,0 +1,547 @@
+"""
+Intent-Aware Result Analyzer
+
+Analyzes research results based on user intent.
+Extracts exactly what the user needs from raw research data.
+
+This is the key innovation - instead of generic analysis,
+we analyze results through the lens of what the user wants to accomplish.
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+import json
+from typing import Dict, Any, List, Optional
+from loguru import logger
+
+from models.research_intent_models import (
+    ResearchIntent,
+    IntentDrivenResearchResult,
+    ExpectedDeliverable,
+    StatisticWithCitation,
+    ExpertQuote,
+    CaseStudySummary,
+    TrendAnalysis,
+    ComparisonTable,
+    ComparisonItem,
+    ProsCons,
+    SourceWithRelevance,
+)
+from models.research_persona_models import ResearchPersona
+from .intent_prompt_builder import IntentPromptBuilder
+
+
+class IntentAwareAnalyzer:
+    """
+    Analyzes research results based on user intent.
+    
+    Instead of generic summaries, this extracts exactly what the user
+    needs: statistics, quotes, case studies, trends, etc.
+    """
+    
+    def __init__(self):
+        """Initialize the analyzer."""
+        self.prompt_builder = IntentPromptBuilder()
+        logger.info("IntentAwareAnalyzer initialized")
+    
+    async def analyze(
+        self,
+        raw_results: Dict[str, Any],
+        intent: ResearchIntent,
+        research_persona: Optional[ResearchPersona] = None,
+    ) -> IntentDrivenResearchResult:
+        """
+        Analyze raw research results based on user intent.
+        
+        Args:
+            raw_results: Raw results from Exa/Tavily/Google
+            intent: The user's research intent
+            research_persona: Optional persona for context
+            
+        Returns:
+            IntentDrivenResearchResult with extracted deliverables
+        """
+        try:
+            logger.info(f"Analyzing results for intent: {intent.primary_question[:50]}...")
+            
+            # Format raw results for analysis
+            formatted_results = self._format_raw_results(raw_results)
+            
+            # Build the analysis prompt
+            prompt = self.prompt_builder.build_intent_aware_analysis_prompt(
+                raw_results=formatted_results,
+                intent=intent,
+                research_persona=research_persona,
+            )
+            
+            # Define the expected JSON schema
+            analysis_schema = self._build_analysis_schema(intent.expected_deliverables)
+            
+            # Call LLM for analysis
+            from services.llm_providers.main_text_generation import llm_text_gen
+            
+            result = llm_text_gen(
+                prompt=prompt,
+                json_struct=analysis_schema,
+                user_id=None
+            )
+            
+            if isinstance(result, dict) and "error" in result:
+                logger.error(f"Intent-aware analysis failed: {result.get('error')}")
+                return self._create_fallback_result(raw_results, intent)
+            
+            # Parse and validate the result
+            analyzed_result = self._parse_analysis_result(result, intent, raw_results)
+            
+            logger.info(
+                f"Analysis complete: {len(analyzed_result.key_takeaways)} takeaways, "
+                f"{len(analyzed_result.statistics)} stats, "
+                f"{len(analyzed_result.sources)} sources"
+            )
+            
+            return analyzed_result
+            
+        except Exception as e:
+            logger.error(f"Error in intent-aware analysis: {e}")
+            return self._create_fallback_result(raw_results, intent)
+    
+    def _format_raw_results(self, raw_results: Dict[str, Any]) -> str:
+        """Format raw research results for LLM analysis."""
+        
+        formatted_parts = []
+        
+        # Extract content
+        content = raw_results.get("content", "")
+        if content:
+            formatted_parts.append(f"=== MAIN CONTENT ===\n{content[:8000]}")
+        
+        # Extract sources with their content
+        sources = raw_results.get("sources", [])
+        if sources:
+            formatted_parts.append("\n=== SOURCES ===")
+            for i, source in enumerate(sources[:15], 1):  # Limit to 15 sources
+                title = source.get("title", "Untitled")
+                url = source.get("url", "")
+                excerpt = source.get("excerpt", source.get("text", source.get("content", "")))
+                
+                formatted_parts.append(f"\nSource {i}: {title}")
+                formatted_parts.append(f"URL: {url}")
+                if excerpt:
+                    formatted_parts.append(f"Content: {excerpt[:500]}")
+        
+        # Extract grounding metadata if available (from Google)
+        grounding = raw_results.get("grounding_metadata", {})
+        if grounding:
+            formatted_parts.append("\n=== GROUNDING DATA ===")
+            formatted_parts.append(json.dumps(grounding, indent=2)[:2000])
+        
+        # Extract any AI answers (from Tavily)
+        answer = raw_results.get("answer", "")
+        if answer:
+            formatted_parts.append(f"\n=== AI-GENERATED ANSWER ===\n{answer}")
+        
+        return "\n".join(formatted_parts)
+    
+    def _build_analysis_schema(self, expected_deliverables: List[str]) -> Dict[str, Any]:
+        """Build JSON schema based on expected deliverables."""
+        
+        # Base schema
+        schema = {
+            "type": "object",
+            "properties": {
+                "primary_answer": {"type": "string"},
+                "secondary_answers": {
+                    "type": "object",
+                    "additionalProperties": {"type": "string"}
+                },
+                "executive_summary": {"type": "string"},
+                "key_takeaways": {
+                    "type": "array",
+                    "items": {"type": "string"},
+                    "maxItems": 7
+                },
+                "confidence": {"type": "number"},
+                "gaps_identified": {
+                    "type": "array",
+                    "items": {"type": "string"}
+                },
+                "follow_up_queries": {
+                    "type": "array",
+                    "items": {"type": "string"}
+                },
+            },
+            "required": ["primary_answer", "executive_summary", "key_takeaways", "confidence"]
+        }
+        
+        # Add deliverable-specific properties
+        if ExpectedDeliverable.KEY_STATISTICS.value in expected_deliverables:
+            schema["properties"]["statistics"] = {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "statistic": {"type": "string"},
+                        "value": {"type": "string"},
+                        "context": {"type": "string"},
+                        "source": {"type": "string"},
+                        "url": {"type": "string"},
+                        "credibility": {"type": "number"},
+                        "recency": {"type": "string"}
+                    },
+                    "required": ["statistic", "context", "source", "url"]
+                }
+            }
+        
+        if ExpectedDeliverable.EXPERT_QUOTES.value in expected_deliverables:
+            schema["properties"]["expert_quotes"] = {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "quote": {"type": "string"},
+                        "speaker": {"type": "string"},
+                        "title": {"type": "string"},
+                        "organization": {"type": "string"},
+                        "source": {"type": "string"},
+                        "url": {"type": "string"}
+                    },
+                    "required": ["quote", "speaker", "source", "url"]
+                }
+            }
+        
+        if ExpectedDeliverable.CASE_STUDIES.value in expected_deliverables:
+            schema["properties"]["case_studies"] = {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "title": {"type": "string"},
+                        "organization": {"type": "string"},
+                        "challenge": {"type": "string"},
+                        "solution": {"type": "string"},
+                        "outcome": {"type": "string"},
+                        "key_metrics": {"type": "array", "items": {"type": "string"}},
+                        "source": {"type": "string"},
+                        "url": {"type": "string"}
+                    },
+                    "required": ["title", "organization", "challenge", "solution", "outcome"]
+                }
+            }
+        
+        if ExpectedDeliverable.TRENDS.value in expected_deliverables:
+            schema["properties"]["trends"] = {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "trend": {"type": "string"},
+                        "direction": {"type": "string"},
+                        "evidence": {"type": "array", "items": {"type": "string"}},
+                        "impact": {"type": "string"},
+                        "timeline": {"type": "string"},
+                        "sources": {"type": "array", "items": {"type": "string"}}
+                    },
+                    "required": ["trend", "direction", "evidence"]
+                }
+            }
+        
+        if ExpectedDeliverable.COMPARISONS.value in expected_deliverables:
+            schema["properties"]["comparisons"] = {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "title": {"type": "string"},
+                        "criteria": {"type": "array", "items": {"type": "string"}},
+                        "items": {
+                            "type": "array",
+                            "items": {
+                                "type": "object",
+                                "properties": {
+                                    "name": {"type": "string"},
+                                    "pros": {"type": "array", "items": {"type": "string"}},
+                                    "cons": {"type": "array", "items": {"type": "string"}},
+                                    "features": {"type": "object"}
+                                }
+                            }
+                        },
+                        "verdict": {"type": "string"}
+                    }
+                }
+            }
+        
+        if ExpectedDeliverable.PROS_CONS.value in expected_deliverables:
+            schema["properties"]["pros_cons"] = {
+                "type": "object",
+                "properties": {
+                    "subject": {"type": "string"},
+                    "pros": {"type": "array", "items": {"type": "string"}},
+                    "cons": {"type": "array", "items": {"type": "string"}},
+                    "balanced_verdict": {"type": "string"}
+                }
+            }
+        
+        if ExpectedDeliverable.BEST_PRACTICES.value in expected_deliverables:
+            schema["properties"]["best_practices"] = {
+                "type": "array",
+                "items": {"type": "string"}
+            }
+        
+        if ExpectedDeliverable.STEP_BY_STEP.value in expected_deliverables:
+            schema["properties"]["step_by_step"] = {
+                "type": "array",
+                "items": {"type": "string"}
+            }
+        
+        if ExpectedDeliverable.DEFINITIONS.value in expected_deliverables:
+            schema["properties"]["definitions"] = {
+                "type": "object",
+                "additionalProperties": {"type": "string"}
+            }
+        
+        if ExpectedDeliverable.EXAMPLES.value in expected_deliverables:
+            schema["properties"]["examples"] = {
+                "type": "array",
+                "items": {"type": "string"}
+            }
+        
+        if ExpectedDeliverable.PREDICTIONS.value in expected_deliverables:
+            schema["properties"]["predictions"] = {
+                "type": "array",
+                "items": {"type": "string"}
+            }
+        
+        # Always include sources and suggested outline
+        schema["properties"]["sources"] = {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "title": {"type": "string"},
+                    "url": {"type": "string"},
+                    "relevance_score": {"type": "number"},
+                    "relevance_reason": {"type": "string"},
+                    "content_type": {"type": "string"},
+                    "credibility_score": {"type": "number"}
+                },
+                "required": ["title", "url"]
+            }
+        }
+        
+        schema["properties"]["suggested_outline"] = {
+            "type": "array",
+            "items": {"type": "string"}
+        }
+        
+        return schema
+    
+    def _parse_analysis_result(
+        self,
+        result: Dict[str, Any],
+        intent: ResearchIntent,
+        raw_results: Dict[str, Any],
+    ) -> IntentDrivenResearchResult:
+        """Parse LLM analysis result into structured format."""
+        
+        # Parse statistics
+        statistics = []
+        for stat in result.get("statistics", []):
+            try:
+                statistics.append(StatisticWithCitation(
+                    statistic=stat.get("statistic", ""),
+                    value=stat.get("value"),
+                    context=stat.get("context", ""),
+                    source=stat.get("source", ""),
+                    url=stat.get("url", ""),
+                    credibility=float(stat.get("credibility", 0.8)),
+                    recency=stat.get("recency"),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to parse statistic: {e}")
+        
+        # Parse expert quotes
+        expert_quotes = []
+        for quote in result.get("expert_quotes", []):
+            try:
+                expert_quotes.append(ExpertQuote(
+                    quote=quote.get("quote", ""),
+                    speaker=quote.get("speaker", ""),
+                    title=quote.get("title"),
+                    organization=quote.get("organization"),
+                    context=quote.get("context"),
+                    source=quote.get("source", ""),
+                    url=quote.get("url", ""),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to parse expert quote: {e}")
+        
+        # Parse case studies
+        case_studies = []
+        for cs in result.get("case_studies", []):
+            try:
+                case_studies.append(CaseStudySummary(
+                    title=cs.get("title", ""),
+                    organization=cs.get("organization", ""),
+                    challenge=cs.get("challenge", ""),
+                    solution=cs.get("solution", ""),
+                    outcome=cs.get("outcome", ""),
+                    key_metrics=cs.get("key_metrics", []),
+                    source=cs.get("source", ""),
+                    url=cs.get("url", ""),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to parse case study: {e}")
+        
+        # Parse trends
+        trends = []
+        for trend in result.get("trends", []):
+            try:
+                trends.append(TrendAnalysis(
+                    trend=trend.get("trend", ""),
+                    direction=trend.get("direction", "growing"),
+                    evidence=trend.get("evidence", []),
+                    impact=trend.get("impact"),
+                    timeline=trend.get("timeline"),
+                    sources=trend.get("sources", []),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to parse trend: {e}")
+        
+        # Parse comparisons
+        comparisons = []
+        for comp in result.get("comparisons", []):
+            try:
+                items = []
+                for item in comp.get("items", []):
+                    items.append(ComparisonItem(
+                        name=item.get("name", ""),
+                        description=item.get("description"),
+                        pros=item.get("pros", []),
+                        cons=item.get("cons", []),
+                        features=item.get("features", {}),
+                        rating=item.get("rating"),
+                        source=item.get("source"),
+                    ))
+                comparisons.append(ComparisonTable(
+                    title=comp.get("title", ""),
+                    criteria=comp.get("criteria", []),
+                    items=items,
+                    winner=comp.get("winner"),
+                    verdict=comp.get("verdict"),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to parse comparison: {e}")
+        
+        # Parse pros/cons
+        pros_cons = None
+        pc_data = result.get("pros_cons")
+        if pc_data:
+            try:
+                pros_cons = ProsCons(
+                    subject=pc_data.get("subject", intent.original_input),
+                    pros=pc_data.get("pros", []),
+                    cons=pc_data.get("cons", []),
+                    balanced_verdict=pc_data.get("balanced_verdict", ""),
+                )
+            except Exception as e:
+                logger.warning(f"Failed to parse pros/cons: {e}")
+        
+        # Parse sources
+        sources = []
+        for src in result.get("sources", []):
+            try:
+                sources.append(SourceWithRelevance(
+                    title=src.get("title", ""),
+                    url=src.get("url", ""),
+                    excerpt=src.get("excerpt"),
+                    relevance_score=float(src.get("relevance_score", 0.8)),
+                    relevance_reason=src.get("relevance_reason"),
+                    content_type=src.get("content_type"),
+                    published_date=src.get("published_date"),
+                    credibility_score=float(src.get("credibility_score", 0.8)),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to parse source: {e}")
+        
+        # If no sources from analysis, extract from raw results
+        if not sources:
+            sources = self._extract_sources_from_raw(raw_results)
+        
+        return IntentDrivenResearchResult(
+            success=True,
+            primary_answer=result.get("primary_answer", ""),
+            secondary_answers=result.get("secondary_answers", {}),
+            statistics=statistics,
+            expert_quotes=expert_quotes,
+            case_studies=case_studies,
+            comparisons=comparisons,
+            trends=trends,
+            best_practices=result.get("best_practices", []),
+            step_by_step=result.get("step_by_step", []),
+            pros_cons=pros_cons,
+            definitions=result.get("definitions", {}),
+            examples=result.get("examples", []),
+            predictions=result.get("predictions", []),
+            executive_summary=result.get("executive_summary", ""),
+            key_takeaways=result.get("key_takeaways", []),
+            suggested_outline=result.get("suggested_outline", []),
+            sources=sources,
+            raw_content=self._format_raw_results(raw_results)[:5000],
+            confidence=float(result.get("confidence", 0.7)),
+            gaps_identified=result.get("gaps_identified", []),
+            follow_up_queries=result.get("follow_up_queries", []),
+            original_intent=intent,
+        )
+    
+    def _extract_sources_from_raw(self, raw_results: Dict[str, Any]) -> List[SourceWithRelevance]:
+        """Extract sources from raw results when analysis doesn't provide them."""
+        
+        sources = []
+        for src in raw_results.get("sources", [])[:10]:
+            try:
+                sources.append(SourceWithRelevance(
+                    title=src.get("title", "Untitled"),
+                    url=src.get("url", ""),
+                    excerpt=src.get("excerpt", src.get("text", ""))[:200],
+                    relevance_score=0.8,
+                    credibility_score=float(src.get("credibility_score", 0.8)),
+                ))
+            except Exception as e:
+                logger.warning(f"Failed to extract source: {e}")
+        
+        return sources
+    
+    def _create_fallback_result(
+        self,
+        raw_results: Dict[str, Any],
+        intent: ResearchIntent,
+    ) -> IntentDrivenResearchResult:
+        """Create a fallback result when AI analysis fails."""
+        
+        # Extract basic information from raw results
+        content = raw_results.get("content", "")
+        sources = self._extract_sources_from_raw(raw_results)
+        
+        # Create basic takeaways from content
+        key_takeaways = []
+        if content:
+            sentences = content.split(". ")[:5]
+            key_takeaways = [s.strip() + "." for s in sentences if len(s) > 20]
+        
+        return IntentDrivenResearchResult(
+            success=True,
+            primary_answer=f"Research findings for: {intent.primary_question}",
+            secondary_answers={},
+            executive_summary=content[:300] if content else "Research completed",
+            key_takeaways=key_takeaways,
+            sources=sources,
+            raw_content=self._format_raw_results(raw_results)[:5000],
+            confidence=0.5,
+            gaps_identified=[
+                "AI analysis failed - showing raw results",
+                "Manual review recommended"
+            ],
+            follow_up_queries=[],
+            original_intent=intent,
+        )
--- a/backend/services/research/intent/intent_prompt_builder.py
+++ b/backend/services/research/intent/intent_prompt_builder.py
@@ -0,0 +1,627 @@
+"""
+Intent Prompt Builder
+
+Builds comprehensive AI prompts for:
+1. Intent inference from user input
+2. Targeted query generation
+3. Intent-aware result analysis
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+import json
+from typing import Dict, Any, List, Optional
+from loguru import logger
+
+from models.research_intent_models import (
+    ResearchIntent,
+    ResearchPurpose,
+    ContentOutput,
+    ExpectedDeliverable,
+    ResearchDepthLevel,
+)
+from models.research_persona_models import ResearchPersona
+
+
+class IntentPromptBuilder:
+    """Builds prompts for intent-driven research."""
+    
+    # Purpose explanations for the AI
+    PURPOSE_EXPLANATIONS = {
+        ResearchPurpose.LEARN: "User wants to understand a topic for personal knowledge",
+        ResearchPurpose.CREATE_CONTENT: "User will create content (blog, video, podcast) from this research",
+        ResearchPurpose.MAKE_DECISION: "User needs to make a choice/decision based on research",
+        ResearchPurpose.COMPARE: "User wants to compare alternatives or competitors",
+        ResearchPurpose.SOLVE_PROBLEM: "User is looking for a solution to a specific problem",
+        ResearchPurpose.FIND_DATA: "User needs specific statistics, facts, or citations",
+        ResearchPurpose.EXPLORE_TRENDS: "User wants to understand current/future trends",
+        ResearchPurpose.VALIDATE: "User wants to verify or fact-check information",
+        ResearchPurpose.GENERATE_IDEAS: "User wants to brainstorm content ideas",
+    }
+    
+    # Deliverable descriptions
+    DELIVERABLE_DESCRIPTIONS = {
+        ExpectedDeliverable.KEY_STATISTICS: "Numbers, percentages, data points with citations",
+        ExpectedDeliverable.EXPERT_QUOTES: "Authoritative quotes from industry experts",
+        ExpectedDeliverable.CASE_STUDIES: "Real examples and success stories",
+        ExpectedDeliverable.COMPARISONS: "Side-by-side analysis tables",
+        ExpectedDeliverable.TRENDS: "Current and emerging industry trends",
+        ExpectedDeliverable.BEST_PRACTICES: "Recommended approaches and guidelines",
+        ExpectedDeliverable.STEP_BY_STEP: "Process guides and how-to instructions",
+        ExpectedDeliverable.PROS_CONS: "Advantages and disadvantages analysis",
+        ExpectedDeliverable.DEFINITIONS: "Clear explanations of concepts and terms",
+        ExpectedDeliverable.CITATIONS: "Authoritative sources for reference",
+        ExpectedDeliverable.EXAMPLES: "Concrete examples to illustrate points",
+        ExpectedDeliverable.PREDICTIONS: "Future outlook and predictions",
+    }
+    
+    def build_intent_inference_prompt(
+        self,
+        user_input: str,
+        keywords: List[str],
+        research_persona: Optional[ResearchPersona] = None,
+        competitor_data: Optional[List[Dict]] = None,
+        industry: Optional[str] = None,
+        target_audience: Optional[str] = None,
+    ) -> str:
+        """
+        Build prompt for inferring user's research intent.
+        
+        This prompt analyzes the user's input and determines:
+        - What they want to accomplish
+        - What questions they need answered
+        - What specific deliverables they need
+        """
+        
+        # Build persona context
+        persona_context = self._build_persona_context(research_persona, industry, target_audience)
+        
+        # Build competitor context
+        competitor_context = self._build_competitor_context(competitor_data)
+        
+        prompt = f"""You are an expert research intent analyzer. Your job is to understand what a content creator REALLY needs from their research.
+
+## USER INPUT
+"{user_input}"
+
+{f"KEYWORDS: {', '.join(keywords)}" if keywords else ""}
+
+## USER CONTEXT
+{persona_context}
+
+{competitor_context}
+
+## YOUR TASK
+
+Analyze the user's input and infer their research intent. Determine:
+
+1. **INPUT TYPE**: Is this:
+   - "keywords": Simple topic keywords (e.g., "AI healthcare 2025")
+   - "question": A specific question (e.g., "What are the best AI tools for healthcare?")
+   - "goal": A goal statement (e.g., "I need to write a blog about AI in healthcare")
+   - "mixed": Combination of above
+
+2. **PRIMARY QUESTION**: What is the main question to answer? Convert their input into a clear question.
+
+3. **SECONDARY QUESTIONS**: What related questions should also be answered? (3-5 questions)
+
+4. **PURPOSE**: Why are they researching? Choose ONE:
+   - "learn": Understand a topic for personal knowledge
+   - "create_content": Create content (blog, video, podcast)
+   - "make_decision": Make a choice between options
+   - "compare": Compare alternatives/competitors
+   - "solve_problem": Find a solution
+   - "find_data": Get specific statistics/facts
+   - "explore_trends": Understand industry trends
+   - "validate": Verify claims/information
+   - "generate_ideas": Brainstorm ideas
+
+5. **CONTENT OUTPUT**: What will they create? Choose ONE:
+   - "blog", "podcast", "video", "social_post", "newsletter", "presentation", "report", "whitepaper", "email", "general"
+
+6. **EXPECTED DELIVERABLES**: What specific outputs do they need? Choose ALL that apply:
+   - "key_statistics": Numbers, data points
+   - "expert_quotes": Authoritative quotes
+   - "case_studies": Real examples
+   - "comparisons": Side-by-side analysis
+   - "trends": Industry trends
+   - "best_practices": Recommendations
+   - "step_by_step": How-to guides
+   - "pros_cons": Advantages/disadvantages
+   - "definitions": Concept explanations
+   - "citations": Source references
+   - "examples": Concrete examples
+   - "predictions": Future outlook
+
+7. **DEPTH**: How deep should the research go?
+   - "overview": Quick summary
+   - "detailed": In-depth analysis
+   - "expert": Comprehensive expert-level
+
+8. **FOCUS AREAS**: What specific aspects should be researched? (2-4 areas)
+
+9. **PERSPECTIVE**: From whose viewpoint? (e.g., "marketing manager", "small business owner")
+
+10. **TIME SENSITIVITY**: Is recency important?
+    - "real_time": Latest only (past 24-48 hours)
+    - "recent": Past week/month
+    - "historical": Include older content
+    - "evergreen": Timeless content
+
+11. **CONFIDENCE**: How confident are you in this inference? (0.0-1.0)
+    - If < 0.7, set needs_clarification to true and provide clarifying_questions
+
+## OUTPUT FORMAT
+
+Return a JSON object:
+```json
+{{
+    "input_type": "keywords|question|goal|mixed",
+    "primary_question": "The main question to answer",
+    "secondary_questions": ["question 1", "question 2", "question 3"],
+    "purpose": "one of the purpose options",
+    "content_output": "one of the content options",
+    "expected_deliverables": ["deliverable1", "deliverable2"],
+    "depth": "overview|detailed|expert",
+    "focus_areas": ["area1", "area2"],
+    "perspective": "target perspective or null",
+    "time_sensitivity": "real_time|recent|historical|evergreen",
+    "confidence": 0.85,
+    "needs_clarification": false,
+    "clarifying_questions": [],
+    "analysis_summary": "Brief summary of what the user wants"
+}}
+```
+
+## IMPORTANT RULES
+
+1. Always convert vague input into a specific primary question
+2. Infer deliverables based on purpose (e.g., create_content → statistics + examples)
+3. Use persona context to refine perspective and focus areas
+4. If input is ambiguous, provide clarifying questions
+5. Default to "detailed" depth unless input suggests otherwise
+6. For content creation, include relevant deliverables automatically
+"""
+
+        return prompt
+    
+    def build_query_generation_prompt(
+        self,
+        intent: ResearchIntent,
+        research_persona: Optional[ResearchPersona] = None,
+    ) -> str:
+        """
+        Build prompt for generating targeted research queries.
+        
+        Generates multiple queries, each targeting a specific deliverable.
+        """
+        
+        deliverables_list = "\n".join([
+            f"- {d}: {self.DELIVERABLE_DESCRIPTIONS.get(ExpectedDeliverable(d), d)}"
+            for d in intent.expected_deliverables
+        ])
+        
+        persona_keywords = ""
+        if research_persona and research_persona.suggested_keywords:
+            persona_keywords = f"\nSUGGESTED KEYWORDS FROM PERSONA: {', '.join(research_persona.suggested_keywords[:10])}"
+        
+        prompt = f"""You are a research query optimizer. Generate multiple targeted search queries based on the user's research intent.
+
+## RESEARCH INTENT
+
+PRIMARY QUESTION: {intent.primary_question}
+
+SECONDARY QUESTIONS:
+{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None'}
+
+PURPOSE: {intent.purpose} - {self.PURPOSE_EXPLANATIONS.get(ResearchPurpose(intent.purpose), intent.purpose)}
+
+CONTENT OUTPUT: {intent.content_output}
+
+EXPECTED DELIVERABLES:
+{deliverables_list}
+
+DEPTH: {intent.depth}
+
+FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
+
+PERSPECTIVE: {intent.perspective or 'General audience'}
+
+TIME SENSITIVITY: {intent.time_sensitivity or 'No specific requirement'}
+{persona_keywords}
+
+## YOUR TASK
+
+Generate 4-8 targeted research queries. Each query should:
+1. Target a specific deliverable or question
+2. Be optimized for semantic search (Exa/Tavily)
+3. Include relevant context for better results
+
+For each query, specify:
+- The query string
+- What deliverable it targets
+- Best provider (exa for semantic/deep, tavily for news/real-time, google for factual)
+- Priority (1-5, higher = more important)
+- What we expect to find
+
+## OUTPUT FORMAT
+
+Return a JSON object:
+```json
+{{
+    "queries": [
+        {{
+            "query": "Healthcare AI adoption statistics 2025 hospitals implementation data",
+            "purpose": "key_statistics",
+            "provider": "exa",
+            "priority": 5,
+            "expected_results": "Statistics on hospital AI adoption rates"
+        }},
+        {{
+            "query": "AI healthcare trends predictions future outlook 2025 2026",
+            "purpose": "trends",
+            "provider": "tavily",
+            "priority": 4,
+            "expected_results": "Current trends and future predictions in healthcare AI"
+        }}
+    ],
+    "enhanced_keywords": ["keyword1", "keyword2", "keyword3"],
+    "research_angles": [
+        "Angle 1: Focus on adoption challenges",
+        "Angle 2: Focus on ROI and outcomes"
+    ]
+}}
+```
+
+## QUERY OPTIMIZATION RULES
+
+1. For STATISTICS: Include words like "statistics", "data", "percentage", "report", "study"
+2. For CASE STUDIES: Include "case study", "success story", "implementation", "example"
+3. For TRENDS: Include "trends", "future", "predictions", "emerging", year numbers
+4. For EXPERT QUOTES: Include expert names if known, or "expert opinion", "interview"
+5. For COMPARISONS: Include "vs", "compare", "comparison", "alternative"
+6. For NEWS/REAL-TIME: Use Tavily, include recent year/month
+7. For ACADEMIC/DEEP: Use Exa with neural search
+"""
+
+        return prompt
+    
+    def build_intent_aware_analysis_prompt(
+        self,
+        raw_results: str,
+        intent: ResearchIntent,
+        research_persona: Optional[ResearchPersona] = None,
+    ) -> str:
+        """
+        Build prompt for analyzing research results based on user intent.
+        
+        This is the key prompt that extracts exactly what the user needs.
+        """
+        
+        purpose_explanation = self.PURPOSE_EXPLANATIONS.get(
+            ResearchPurpose(intent.purpose), 
+            intent.purpose
+        )
+        
+        deliverables_instructions = self._build_deliverables_instructions(intent.expected_deliverables)
+        
+        perspective_instruction = ""
+        if intent.perspective:
+            perspective_instruction = f"\n**PERSPECTIVE**: Analyze results from the viewpoint of: {intent.perspective}"
+        
+        prompt = f"""You are a research analyst helping a content creator find exactly what they need. Your job is to analyze raw research results and extract precisely what the user is looking for.
+
+## USER'S RESEARCH INTENT
+
+PRIMARY QUESTION: {intent.primary_question}
+
+SECONDARY QUESTIONS:
+{chr(10).join(f'- {q}' for q in intent.secondary_questions) if intent.secondary_questions else 'None specified'}
+
+PURPOSE: {intent.purpose}
+→ {purpose_explanation}
+
+CONTENT OUTPUT: {intent.content_output}
+
+EXPECTED DELIVERABLES: {', '.join(intent.expected_deliverables)}
+
+FOCUS AREAS: {', '.join(intent.focus_areas) if intent.focus_areas else 'General'}
+{perspective_instruction}
+
+## RAW RESEARCH RESULTS
+
+{raw_results[:15000]}  # Truncated for token limits
+
+## YOUR TASK
+
+Analyze the raw research results and extract EXACTLY what the user needs.
+
+{deliverables_instructions}
+
+## OUTPUT REQUIREMENTS
+
+Provide results in this JSON structure:
+
+```json
+{{
+    "primary_answer": "Direct 2-3 sentence answer to the primary question",
+    "secondary_answers": {{
+        "Question 1?": "Answer to question 1",
+        "Question 2?": "Answer to question 2"
+    }},
+    "executive_summary": "2-3 sentence executive summary of all findings",
+    "key_takeaways": [
+        "Key takeaway 1 - most important finding",
+        "Key takeaway 2",
+        "Key takeaway 3",
+        "Key takeaway 4",
+        "Key takeaway 5"
+    ],
+    "statistics": [
+        {{
+            "statistic": "72% of hospitals plan to adopt AI by 2025",
+            "value": "72%",
+            "context": "Survey of 500 US hospitals in 2024",
+            "source": "Healthcare AI Report 2024",
+            "url": "https://example.com/report",
+            "credibility": 0.9,
+            "recency": "2024"
+        }}
+    ],
+    "expert_quotes": [
+        {{
+            "quote": "AI will revolutionize patient care within 5 years",
+            "speaker": "Dr. Jane Smith",
+            "title": "Chief Medical Officer",
+            "organization": "HealthTech Inc",
+            "source": "TechCrunch",
+            "url": "https://example.com/article"
+        }}
+    ],
+    "case_studies": [
+        {{
+            "title": "Mayo Clinic AI Implementation",
+            "organization": "Mayo Clinic",
+            "challenge": "High patient wait times",
+            "solution": "AI-powered triage system",
+            "outcome": "40% reduction in wait times",
+            "key_metrics": ["40% faster triage", "95% patient satisfaction"],
+            "source": "Healthcare IT News",
+            "url": "https://example.com"
+        }}
+    ],
+    "trends": [
+        {{
+            "trend": "AI-assisted diagnostics adoption",
+            "direction": "growing",
+            "evidence": ["25% YoY growth", "Major hospital chains investing"],
+            "impact": "Could reduce misdiagnosis by 30%",
+            "timeline": "Expected mainstream by 2027",
+            "sources": ["url1", "url2"]
+        }}
+    ],
+    "comparisons": [
+        {{
+            "title": "Top AI Healthcare Platforms",
+            "criteria": ["Cost", "Features", "Support"],
+            "items": [
+                {{
+                    "name": "Platform A",
+                    "pros": ["Easy integration", "Good support"],
+                    "cons": ["Higher cost"],
+                    "features": {{"Cost": "$500/month", "Support": "24/7"}}
+                }}
+            ],
+            "verdict": "Platform A best for large hospitals"
+        }}
+    ],
+    "best_practices": [
+        "Start with a pilot program before full deployment",
+        "Ensure staff training is comprehensive"
+    ],
+    "step_by_step": [
+        "Step 1: Assess current infrastructure",
+        "Step 2: Define use cases",
+        "Step 3: Select vendor"
+    ],
+    "pros_cons": {{
+        "subject": "AI in Healthcare",
+        "pros": ["Improved accuracy", "Cost savings"],
+        "cons": ["Initial investment", "Training required"],
+        "balanced_verdict": "Benefits outweigh costs for most hospitals"
+    }},
+    "definitions": {{
+        "Clinical AI": "AI systems designed for medical diagnosis and treatment recommendations"
+    }},
+    "examples": [
+        "Example: Hospital X reduced readmissions by 25% using predictive AI"
+    ],
+    "predictions": [
+        "By 2030, AI will assist in 80% of initial diagnoses"
+    ],
+    "suggested_outline": [
+        "1. Introduction: The AI Healthcare Revolution",
+        "2. Current State: Where We Are Today",
+        "3. Key Statistics and Trends",
+        "4. Case Studies: Success Stories",
+        "5. Implementation Guide",
+        "6. Future Outlook"
+    ],
+    "sources": [
+        {{
+            "title": "Healthcare AI Report 2024",
+            "url": "https://example.com",
+            "relevance_score": 0.95,
+            "relevance_reason": "Directly addresses adoption statistics",
+            "content_type": "research report",
+            "credibility_score": 0.9
+        }}
+    ],
+    "confidence": 0.85,
+    "gaps_identified": [
+        "Specific cost data for small clinics not found",
+        "Limited information on regulatory challenges"
+    ],
+    "follow_up_queries": [
+        "AI healthcare regulations FDA 2025",
+        "Small clinic AI implementation costs"
+    ]
+}}
+```
+
+## CRITICAL RULES
+
+1. **ONLY include information directly from the raw results** - do not make up data
+2. **ALWAYS include source URLs** for every statistic, quote, and case study
+3. **If a deliverable type has no relevant data**, return an empty array for it
+4. **Prioritize recency and credibility** when multiple sources conflict
+5. **Answer the PRIMARY QUESTION directly** in 2-3 clear sentences
+6. **Keep KEY TAKEAWAYS to 5-7 points** - the most important findings
+7. **Add to gaps_identified** if expected information is missing
+8. **Suggest follow_up_queries** for gaps or incomplete areas
+9. **Rate confidence** based on how well results match the user's intent
+10. **Include deliverables ONLY if they are in expected_deliverables** or critical to the question
+"""
+
+        return prompt
+    
+    def _build_persona_context(
+        self,
+        research_persona: Optional[ResearchPersona],
+        industry: Optional[str],
+        target_audience: Optional[str],
+    ) -> str:
+        """Build persona context section for prompts."""
+        
+        if not research_persona and not industry:
+            return "No specific persona context available."
+        
+        context_parts = []
+        
+        if research_persona:
+            context_parts.append(f"INDUSTRY: {research_persona.default_industry}")
+            context_parts.append(f"TARGET AUDIENCE: {research_persona.default_target_audience}")
+            if research_persona.suggested_keywords:
+                context_parts.append(f"TYPICAL TOPICS: {', '.join(research_persona.suggested_keywords[:5])}")
+            if research_persona.research_angles:
+                context_parts.append(f"RESEARCH ANGLES: {', '.join(research_persona.research_angles[:3])}")
+        else:
+            if industry:
+                context_parts.append(f"INDUSTRY: {industry}")
+            if target_audience:
+                context_parts.append(f"TARGET AUDIENCE: {target_audience}")
+        
+        return "\n".join(context_parts)
+    
+    def _build_competitor_context(self, competitor_data: Optional[List[Dict]]) -> str:
+        """Build competitor context section for prompts."""
+        
+        if not competitor_data:
+            return ""
+        
+        competitor_names = []
+        for comp in competitor_data[:5]:  # Limit to 5
+            name = comp.get("name") or comp.get("domain") or comp.get("url", "Unknown")
+            competitor_names.append(name)
+        
+        if competitor_names:
+            return f"\nKNOWN COMPETITORS: {', '.join(competitor_names)}"
+        
+        return ""
+    
+    def _build_deliverables_instructions(self, expected_deliverables: List[str]) -> str:
+        """Build specific extraction instructions for each expected deliverable."""
+        
+        instructions = ["### EXTRACTION INSTRUCTIONS\n"]
+        instructions.append("For each requested deliverable, extract the following:\n")
+        
+        deliverable_instructions = {
+            ExpectedDeliverable.KEY_STATISTICS: """
+**STATISTICS**:
+- Extract ALL relevant statistics with exact numbers
+- Include source attribution (publication name, URL)
+- Note the recency of the data
+- Rate credibility based on source authority
+- Format: statistic statement, value, context, source, URL, credibility score
+""",
+            ExpectedDeliverable.EXPERT_QUOTES: """
+**EXPERT QUOTES**:
+- Extract authoritative quotes from named experts
+- Include speaker name, title, and organization
+- Provide context for the quote
+- Include source URL
+""",
+            ExpectedDeliverable.CASE_STUDIES: """
+**CASE STUDIES**:
+- Summarize each case study: challenge → solution → outcome
+- Include key metrics and results
+- Name the organization involved
+- Provide source URL
+""",
+            ExpectedDeliverable.TRENDS: """
+**TRENDS**:
+- Identify current and emerging trends
+- Note direction: growing, declining, emerging, or stable
+- List supporting evidence
+- Include timeline predictions if available
+- Cite sources
+""",
+            ExpectedDeliverable.COMPARISONS: """
+**COMPARISONS**:
+- Build comparison tables where applicable
+- Define clear comparison criteria
+- List pros and cons for each option
+- Provide a verdict/recommendation if data supports it
+""",
+            ExpectedDeliverable.BEST_PRACTICES: """
+**BEST PRACTICES**:
+- Extract recommended approaches
+- Provide actionable guidelines
+- Order by importance or sequence
+""",
+            ExpectedDeliverable.STEP_BY_STEP: """
+**STEP BY STEP**:
+- Extract process/how-to instructions
+- Number steps clearly
+- Include any prerequisites or requirements
+""",
+            ExpectedDeliverable.PROS_CONS: """
+**PROS AND CONS**:
+- List advantages (pros)
+- List disadvantages (cons)
+- Provide a balanced verdict
+""",
+            ExpectedDeliverable.DEFINITIONS: """
+**DEFINITIONS**:
+- Extract clear explanations of key terms and concepts
+- Keep definitions concise but comprehensive
+""",
+            ExpectedDeliverable.EXAMPLES: """
+**EXAMPLES**:
+- Extract concrete examples that illustrate key points
+- Include real-world applications
+""",
+            ExpectedDeliverable.PREDICTIONS: """
+**PREDICTIONS**:
+- Extract future outlook and predictions
+- Note the source and their track record if known
+- Include timeframes where mentioned
+""",
+            ExpectedDeliverable.CITATIONS: """
+**CITATIONS**:
+- List all authoritative sources with URLs
+- Rate credibility and relevance
+- Note content type (research, news, opinion, etc.)
+""",
+        }
+        
+        for deliverable in expected_deliverables:
+            try:
+                d_enum = ExpectedDeliverable(deliverable)
+                if d_enum in deliverable_instructions:
+                    instructions.append(deliverable_instructions[d_enum])
+            except ValueError:
+                pass
+        
+        return "\n".join(instructions)
--- a/backend/services/research/intent/intent_query_generator.py
+++ b/backend/services/research/intent/intent_query_generator.py
@@ -0,0 +1,387 @@
+"""
+Intent Query Generator
+
+Generates multiple targeted research queries based on user intent.
+Each query targets a specific deliverable or question.
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+import json
+from typing import Dict, Any, List, Optional
+from loguru import logger
+
+from models.research_intent_models import (
+    ResearchIntent,
+    ResearchQuery,
+    ExpectedDeliverable,
+    ResearchPurpose,
+)
+from models.research_persona_models import ResearchPersona
+from .intent_prompt_builder import IntentPromptBuilder
+
+
+class IntentQueryGenerator:
+    """
+    Generates targeted research queries based on user intent.
+    
+    Instead of a single generic search, generates multiple queries
+    each targeting a specific deliverable or question.
+    """
+    
+    def __init__(self):
+        """Initialize the query generator."""
+        self.prompt_builder = IntentPromptBuilder()
+        logger.info("IntentQueryGenerator initialized")
+    
+    async def generate_queries(
+        self,
+        intent: ResearchIntent,
+        research_persona: Optional[ResearchPersona] = None,
+    ) -> Dict[str, Any]:
+        """
+        Generate targeted research queries based on intent.
+        
+        Args:
+            intent: The inferred research intent
+            research_persona: Optional persona for context
+            
+        Returns:
+            Dict with queries, enhanced_keywords, and research_angles
+        """
+        try:
+            logger.info(f"Generating queries for: {intent.primary_question[:50]}...")
+            
+            # Build the query generation prompt
+            prompt = self.prompt_builder.build_query_generation_prompt(
+                intent=intent,
+                research_persona=research_persona,
+            )
+            
+            # Define the expected JSON schema
+            query_schema = {
+                "type": "object",
+                "properties": {
+                    "queries": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {
+                                "query": {"type": "string"},
+                                "purpose": {"type": "string"},
+                                "provider": {"type": "string"},
+                                "priority": {"type": "integer"},
+                                "expected_results": {"type": "string"}
+                            },
+                            "required": ["query", "purpose", "provider", "priority", "expected_results"]
+                        }
+                    },
+                    "enhanced_keywords": {"type": "array", "items": {"type": "string"}},
+                    "research_angles": {"type": "array", "items": {"type": "string"}}
+                },
+                "required": ["queries", "enhanced_keywords", "research_angles"]
+            }
+            
+            # Call LLM for query generation
+            from services.llm_providers.main_text_generation import llm_text_gen
+            
+            result = llm_text_gen(
+                prompt=prompt,
+                json_struct=query_schema,
+                user_id=None
+            )
+            
+            if isinstance(result, dict) and "error" in result:
+                logger.error(f"Query generation failed: {result.get('error')}")
+                return self._create_fallback_queries(intent)
+            
+            # Parse queries
+            queries = self._parse_queries(result.get("queries", []))
+            
+            # Ensure we have queries for all expected deliverables
+            queries = self._ensure_deliverable_coverage(queries, intent)
+            
+            # Sort by priority
+            queries.sort(key=lambda q: q.priority, reverse=True)
+            
+            logger.info(f"Generated {len(queries)} targeted queries")
+            
+            return {
+                "queries": queries,
+                "enhanced_keywords": result.get("enhanced_keywords", []),
+                "research_angles": result.get("research_angles", []),
+            }
+            
+        except Exception as e:
+            logger.error(f"Error generating queries: {e}")
+            return self._create_fallback_queries(intent)
+    
+    def _parse_queries(self, raw_queries: List[Dict]) -> List[ResearchQuery]:
+        """Parse raw query data into ResearchQuery objects."""
+        
+        queries = []
+        for q in raw_queries:
+            try:
+                # Validate purpose
+                purpose_str = q.get("purpose", "key_statistics")
+                try:
+                    purpose = ExpectedDeliverable(purpose_str)
+                except ValueError:
+                    purpose = ExpectedDeliverable.KEY_STATISTICS
+                
+                query = ResearchQuery(
+                    query=q.get("query", ""),
+                    purpose=purpose,
+                    provider=q.get("provider", "exa"),
+                    priority=min(max(int(q.get("priority", 3)), 1), 5),  # Clamp 1-5
+                    expected_results=q.get("expected_results", ""),
+                )
+                queries.append(query)
+            except Exception as e:
+                logger.warning(f"Failed to parse query: {e}")
+                continue
+        
+        return queries
+    
+    def _ensure_deliverable_coverage(
+        self,
+        queries: List[ResearchQuery],
+        intent: ResearchIntent,
+    ) -> List[ResearchQuery]:
+        """Ensure we have queries for all expected deliverables."""
+        
+        # Get deliverables already covered
+        covered = set(q.purpose.value for q in queries)
+        
+        # Check for missing deliverables
+        for deliverable in intent.expected_deliverables:
+            if deliverable not in covered:
+                # Generate a query for this deliverable
+                query = self._generate_query_for_deliverable(
+                    deliverable=deliverable,
+                    intent=intent,
+                )
+                queries.append(query)
+        
+        return queries
+    
+    def _generate_query_for_deliverable(
+        self,
+        deliverable: str,
+        intent: ResearchIntent,
+    ) -> ResearchQuery:
+        """Generate a query targeting a specific deliverable."""
+        
+        # Extract topic from primary question
+        topic = intent.original_input
+        
+        # Query templates by deliverable type
+        templates = {
+            ExpectedDeliverable.KEY_STATISTICS.value: {
+                "query": f"{topic} statistics data report study",
+                "provider": "exa",
+                "priority": 5,
+                "expected": "Statistical data and research findings",
+            },
+            ExpectedDeliverable.EXPERT_QUOTES.value: {
+                "query": f"{topic} expert opinion interview insights",
+                "provider": "exa",
+                "priority": 4,
+                "expected": "Expert opinions and authoritative quotes",
+            },
+            ExpectedDeliverable.CASE_STUDIES.value: {
+                "query": f"{topic} case study success story implementation example",
+                "provider": "exa",
+                "priority": 4,
+                "expected": "Real-world case studies and examples",
+            },
+            ExpectedDeliverable.TRENDS.value: {
+                "query": f"{topic} trends 2025 future predictions emerging",
+                "provider": "tavily",
+                "priority": 4,
+                "expected": "Current trends and future predictions",
+            },
+            ExpectedDeliverable.COMPARISONS.value: {
+                "query": f"{topic} comparison vs versus alternatives",
+                "provider": "exa",
+                "priority": 4,
+                "expected": "Comparison and alternative options",
+            },
+            ExpectedDeliverable.BEST_PRACTICES.value: {
+                "query": f"{topic} best practices recommendations guidelines",
+                "provider": "exa",
+                "priority": 3,
+                "expected": "Best practices and recommendations",
+            },
+            ExpectedDeliverable.STEP_BY_STEP.value: {
+                "query": f"{topic} how to guide tutorial steps",
+                "provider": "exa",
+                "priority": 3,
+                "expected": "Step-by-step guides and tutorials",
+            },
+            ExpectedDeliverable.PROS_CONS.value: {
+                "query": f"{topic} advantages disadvantages pros cons benefits",
+                "provider": "exa",
+                "priority": 3,
+                "expected": "Pros, cons, and trade-offs",
+            },
+            ExpectedDeliverable.DEFINITIONS.value: {
+                "query": f"what is {topic} definition explained",
+                "provider": "exa",
+                "priority": 3,
+                "expected": "Clear definitions and explanations",
+            },
+            ExpectedDeliverable.EXAMPLES.value: {
+                "query": f"{topic} examples real world applications",
+                "provider": "exa",
+                "priority": 3,
+                "expected": "Real-world examples and applications",
+            },
+            ExpectedDeliverable.PREDICTIONS.value: {
+                "query": f"{topic} future outlook predictions 2025 2030",
+                "provider": "tavily",
+                "priority": 4,
+                "expected": "Future predictions and outlook",
+            },
+            ExpectedDeliverable.CITATIONS.value: {
+                "query": f"{topic} research paper study academic",
+                "provider": "exa",
+                "priority": 4,
+                "expected": "Authoritative academic sources",
+            },
+        }
+        
+        template = templates.get(deliverable, {
+            "query": f"{topic}",
+            "provider": "exa",
+            "priority": 3,
+            "expected": "General information",
+        })
+        
+        return ResearchQuery(
+            query=template["query"],
+            purpose=ExpectedDeliverable(deliverable) if deliverable in [e.value for e in ExpectedDeliverable] else ExpectedDeliverable.KEY_STATISTICS,
+            provider=template["provider"],
+            priority=template["priority"],
+            expected_results=template["expected"],
+        )
+    
+    def _create_fallback_queries(self, intent: ResearchIntent) -> Dict[str, Any]:
+        """Create fallback queries when AI generation fails."""
+        
+        topic = intent.original_input
+        
+        # Generate basic queries for each expected deliverable
+        queries = []
+        for deliverable in intent.expected_deliverables[:5]:  # Limit to 5
+            query = self._generate_query_for_deliverable(deliverable, intent)
+            queries.append(query)
+        
+        # Add a general query if we have none
+        if not queries:
+            queries.append(ResearchQuery(
+                query=topic,
+                purpose=ExpectedDeliverable.KEY_STATISTICS,
+                provider="exa",
+                priority=5,
+                expected_results="General information and insights",
+            ))
+        
+        return {
+            "queries": queries,
+            "enhanced_keywords": topic.split()[:10],
+            "research_angles": [
+                f"Overview of {topic}",
+                f"Latest trends in {topic}",
+            ],
+        }
+
+
+class QueryOptimizer:
+    """
+    Optimizes queries for different research providers.
+    
+    Different providers have different strengths:
+    - Exa: Semantic search, good for deep research
+    - Tavily: Real-time search, good for news/trends
+    - Google: Factual search, good for basic info
+    """
+    
+    @staticmethod
+    def optimize_for_exa(query: str, intent: ResearchIntent) -> Dict[str, Any]:
+        """Optimize query and parameters for Exa."""
+        
+        # Determine best Exa settings based on deliverable
+        deliverables = intent.expected_deliverables
+        
+        # Determine category
+        category = None
+        if ExpectedDeliverable.CITATIONS.value in deliverables:
+            category = "research paper"
+        elif ExpectedDeliverable.TRENDS.value in deliverables:
+            category = "news"
+        elif intent.purpose == ResearchPurpose.COMPARE.value:
+            category = "company"
+        
+        # Determine search type
+        search_type = "neural"  # Default to neural for semantic understanding
+        if ExpectedDeliverable.TRENDS.value in deliverables:
+            search_type = "auto"  # Auto is better for time-sensitive queries
+        
+        # Number of results
+        num_results = 10
+        if intent.depth == "expert":
+            num_results = 20
+        elif intent.depth == "overview":
+            num_results = 5
+        
+        return {
+            "query": query,
+            "type": search_type,
+            "category": category,
+            "num_results": num_results,
+            "text": True,
+            "highlights": True,
+        }
+    
+    @staticmethod
+    def optimize_for_tavily(query: str, intent: ResearchIntent) -> Dict[str, Any]:
+        """Optimize query and parameters for Tavily."""
+        
+        deliverables = intent.expected_deliverables
+        
+        # Determine topic
+        topic = "general"
+        if ExpectedDeliverable.TRENDS.value in deliverables:
+            topic = "news"
+        
+        # Determine search depth
+        search_depth = "basic"
+        if intent.depth in ["detailed", "expert"]:
+            search_depth = "advanced"
+        
+        # Include answer for factual queries
+        include_answer = False
+        if ExpectedDeliverable.DEFINITIONS.value in deliverables:
+            include_answer = "advanced"
+        elif ExpectedDeliverable.KEY_STATISTICS.value in deliverables:
+            include_answer = "basic"
+        
+        # Time range for trends
+        time_range = None
+        if intent.time_sensitivity == "real_time":
+            time_range = "day"
+        elif intent.time_sensitivity == "recent":
+            time_range = "week"
+        elif ExpectedDeliverable.TRENDS.value in deliverables:
+            time_range = "month"
+        
+        return {
+            "query": query,
+            "topic": topic,
+            "search_depth": search_depth,
+            "include_answer": include_answer,
+            "time_range": time_range,
+            "max_results": 10,
+        }
--- a/backend/services/research/intent/research_intent_inference.py
+++ b/backend/services/research/intent/research_intent_inference.py
@@ -0,0 +1,378 @@
+"""
+Research Intent Inference Service
+
+Analyzes user input to understand their research intent.
+Uses AI to infer:
+- What the user wants to accomplish
+- What questions need answering
+- What deliverables they expect
+
+Author: ALwrity Team
+Version: 1.0
+"""
+
+import json
+from typing import Dict, Any, List, Optional
+from loguru import logger
+
+from models.research_intent_models import (
+    ResearchIntent,
+    ResearchPurpose,
+    ContentOutput,
+    ExpectedDeliverable,
+    ResearchDepthLevel,
+    InputType,
+    IntentInferenceRequest,
+    IntentInferenceResponse,
+    ResearchQuery,
+)
+from models.research_persona_models import ResearchPersona
+from .intent_prompt_builder import IntentPromptBuilder
+
+
+class ResearchIntentInference:
+    """
+    Infers user research intent from minimal input.
+    
+    Instead of asking a formal questionnaire, this service
+    uses AI to understand what the user really wants.
+    """
+    
+    def __init__(self):
+        """Initialize the intent inference service."""
+        self.prompt_builder = IntentPromptBuilder()
+        logger.info("ResearchIntentInference initialized")
+    
+    async def infer_intent(
+        self,
+        user_input: str,
+        keywords: Optional[List[str]] = None,
+        research_persona: Optional[ResearchPersona] = None,
+        competitor_data: Optional[List[Dict]] = None,
+        industry: Optional[str] = None,
+        target_audience: Optional[str] = None,
+    ) -> IntentInferenceResponse:
+        """
+        Analyze user input and infer their research intent.
+        
+        Args:
+            user_input: User's keywords, question, or goal
+            keywords: Extracted keywords (optional)
+            research_persona: User's research persona (optional)
+            competitor_data: Competitor analysis data (optional)
+            industry: Industry context (optional)
+            target_audience: Target audience context (optional)
+            
+        Returns:
+            IntentInferenceResponse with inferred intent and suggested queries
+        """
+        try:
+            logger.info(f"Inferring intent for: {user_input[:100]}...")
+            
+            keywords = keywords or []
+            
+            # Build the inference prompt
+            prompt = self.prompt_builder.build_intent_inference_prompt(
+                user_input=user_input,
+                keywords=keywords,
+                research_persona=research_persona,
+                competitor_data=competitor_data,
+                industry=industry,
+                target_audience=target_audience,
+            )
+            
+            # Define the expected JSON schema
+            intent_schema = {
+                "type": "object",
+                "properties": {
+                    "input_type": {"type": "string", "enum": ["keywords", "question", "goal", "mixed"]},
+                    "primary_question": {"type": "string"},
+                    "secondary_questions": {"type": "array", "items": {"type": "string"}},
+                    "purpose": {"type": "string"},
+                    "content_output": {"type": "string"},
+                    "expected_deliverables": {"type": "array", "items": {"type": "string"}},
+                    "depth": {"type": "string", "enum": ["overview", "detailed", "expert"]},
+                    "focus_areas": {"type": "array", "items": {"type": "string"}},
+                    "perspective": {"type": "string"},
+                    "time_sensitivity": {"type": "string"},
+                    "confidence": {"type": "number"},
+                    "needs_clarification": {"type": "boolean"},
+                    "clarifying_questions": {"type": "array", "items": {"type": "string"}},
+                    "analysis_summary": {"type": "string"}
+                },
+                "required": [
+                    "input_type", "primary_question", "purpose", "content_output",
+                    "expected_deliverables", "depth", "confidence", "analysis_summary"
+                ]
+            }
+            
+            # Call LLM for intent inference
+            from services.llm_providers.main_text_generation import llm_text_gen
+            
+            result = llm_text_gen(
+                prompt=prompt,
+                json_struct=intent_schema,
+                user_id=None
+            )
+            
+            if isinstance(result, dict) and "error" in result:
+                logger.error(f"Intent inference failed: {result.get('error')}")
+                return self._create_fallback_response(user_input, keywords)
+            
+            # Parse and validate the result
+            intent = self._parse_intent_result(result, user_input)
+            
+            # Generate quick options for UI
+            quick_options = self._generate_quick_options(intent, result)
+            
+            # Create response
+            response = IntentInferenceResponse(
+                success=True,
+                intent=intent,
+                analysis_summary=result.get("analysis_summary", "Research intent analyzed"),
+                suggested_queries=[],  # Will be populated by query generator
+                suggested_keywords=self._extract_keywords_from_input(user_input, keywords),
+                suggested_angles=result.get("focus_areas", []),
+                quick_options=quick_options,
+            )
+            
+            logger.info(f"Intent inferred: purpose={intent.purpose}, confidence={intent.confidence}")
+            return response
+            
+        except Exception as e:
+            logger.error(f"Error inferring intent: {e}")
+            return self._create_fallback_response(user_input, keywords or [])
+    
+    def _parse_intent_result(self, result: Dict[str, Any], user_input: str) -> ResearchIntent:
+        """Parse LLM result into ResearchIntent model."""
+        
+        # Map string values to enums safely
+        input_type = self._safe_enum(InputType, result.get("input_type", "keywords"), InputType.KEYWORDS)
+        purpose = self._safe_enum(ResearchPurpose, result.get("purpose", "learn"), ResearchPurpose.LEARN)
+        content_output = self._safe_enum(ContentOutput, result.get("content_output", "general"), ContentOutput.GENERAL)
+        depth = self._safe_enum(ResearchDepthLevel, result.get("depth", "detailed"), ResearchDepthLevel.DETAILED)
+        
+        # Parse expected deliverables
+        raw_deliverables = result.get("expected_deliverables", [])
+        expected_deliverables = []
+        for d in raw_deliverables:
+            try:
+                expected_deliverables.append(ExpectedDeliverable(d).value)
+            except ValueError:
+                # Skip invalid deliverables
+                pass
+        
+        # Ensure we have at least some deliverables
+        if not expected_deliverables:
+            expected_deliverables = self._infer_deliverables_from_purpose(purpose)
+        
+        return ResearchIntent(
+            primary_question=result.get("primary_question", user_input),
+            secondary_questions=result.get("secondary_questions", []),
+            purpose=purpose.value,
+            content_output=content_output.value,
+            expected_deliverables=expected_deliverables,
+            depth=depth.value,
+            focus_areas=result.get("focus_areas", []),
+            perspective=result.get("perspective"),
+            time_sensitivity=result.get("time_sensitivity"),
+            input_type=input_type.value,
+            original_input=user_input,
+            confidence=float(result.get("confidence", 0.7)),
+            needs_clarification=result.get("needs_clarification", False),
+            clarifying_questions=result.get("clarifying_questions", []),
+        )
+    
+    def _safe_enum(self, enum_class, value: str, default):
+        """Safely convert string to enum, returning default if invalid."""
+        try:
+            return enum_class(value)
+        except ValueError:
+            return default
+    
+    def _infer_deliverables_from_purpose(self, purpose: ResearchPurpose) -> List[str]:
+        """Infer expected deliverables based on research purpose."""
+        
+        purpose_deliverables = {
+            ResearchPurpose.LEARN: [
+                ExpectedDeliverable.DEFINITIONS.value,
+                ExpectedDeliverable.EXAMPLES.value,
+                ExpectedDeliverable.KEY_STATISTICS.value,
+            ],
+            ResearchPurpose.CREATE_CONTENT: [
+                ExpectedDeliverable.KEY_STATISTICS.value,
+                ExpectedDeliverable.EXPERT_QUOTES.value,
+                ExpectedDeliverable.EXAMPLES.value,
+                ExpectedDeliverable.CASE_STUDIES.value,
+            ],
+            ResearchPurpose.MAKE_DECISION: [
+                ExpectedDeliverable.PROS_CONS.value,
+                ExpectedDeliverable.COMPARISONS.value,
+                ExpectedDeliverable.BEST_PRACTICES.value,
+            ],
+            ResearchPurpose.COMPARE: [
+                ExpectedDeliverable.COMPARISONS.value,
+                ExpectedDeliverable.PROS_CONS.value,
+                ExpectedDeliverable.KEY_STATISTICS.value,
+            ],
+            ResearchPurpose.SOLVE_PROBLEM: [
+                ExpectedDeliverable.STEP_BY_STEP.value,
+                ExpectedDeliverable.BEST_PRACTICES.value,
+                ExpectedDeliverable.CASE_STUDIES.value,
+            ],
+            ResearchPurpose.FIND_DATA: [
+                ExpectedDeliverable.KEY_STATISTICS.value,
+                ExpectedDeliverable.CITATIONS.value,
+            ],
+            ResearchPurpose.EXPLORE_TRENDS: [
+                ExpectedDeliverable.TRENDS.value,
+                ExpectedDeliverable.PREDICTIONS.value,
+                ExpectedDeliverable.KEY_STATISTICS.value,
+            ],
+            ResearchPurpose.VALIDATE: [
+                ExpectedDeliverable.CITATIONS.value,
+                ExpectedDeliverable.KEY_STATISTICS.value,
+                ExpectedDeliverable.EXPERT_QUOTES.value,
+            ],
+            ResearchPurpose.GENERATE_IDEAS: [
+                ExpectedDeliverable.EXAMPLES.value,
+                ExpectedDeliverable.TRENDS.value,
+                ExpectedDeliverable.CASE_STUDIES.value,
+            ],
+        }
+        
+        return purpose_deliverables.get(purpose, [ExpectedDeliverable.KEY_STATISTICS.value])
+    
+    def _generate_quick_options(self, intent: ResearchIntent, result: Dict[str, Any]) -> List[Dict[str, Any]]:
+        """Generate quick options for UI confirmation."""
+        
+        options = []
+        
+        # Purpose option
+        options.append({
+            "id": "purpose",
+            "label": "Research Purpose",
+            "value": intent.purpose,
+            "display": self._purpose_display(intent.purpose),
+            "alternatives": [p.value for p in ResearchPurpose],
+            "confidence": result.get("confidence", 0.7),
+        })
+        
+        # Content output option
+        if intent.content_output != ContentOutput.GENERAL.value:
+            options.append({
+                "id": "content_output",
+                "label": "Content Type",
+                "value": intent.content_output,
+                "display": intent.content_output.replace("_", " ").title(),
+                "alternatives": [c.value for c in ContentOutput],
+                "confidence": result.get("confidence", 0.7),
+            })
+        
+        # Deliverables option
+        options.append({
+            "id": "deliverables",
+            "label": "What I'll Find",
+            "value": intent.expected_deliverables,
+            "display": [d.replace("_", " ").title() for d in intent.expected_deliverables[:4]],
+            "alternatives": [d.value for d in ExpectedDeliverable],
+            "confidence": result.get("confidence", 0.7),
+            "multi_select": True,
+        })
+        
+        # Depth option
+        options.append({
+            "id": "depth",
+            "label": "Research Depth",
+            "value": intent.depth,
+            "display": intent.depth.title(),
+            "alternatives": [d.value for d in ResearchDepthLevel],
+            "confidence": result.get("confidence", 0.7),
+        })
+        
+        return options
+    
+    def _purpose_display(self, purpose: str) -> str:
+        """Get display-friendly purpose text."""
+        display_map = {
+            "learn": "Understand this topic",
+            "create_content": "Create content about this",
+            "make_decision": "Make a decision",
+            "compare": "Compare options",
+            "solve_problem": "Solve a problem",
+            "find_data": "Find specific data",
+            "explore_trends": "Explore trends",
+            "validate": "Validate information",
+            "generate_ideas": "Generate ideas",
+        }
+        return display_map.get(purpose, purpose.replace("_", " ").title())
+    
+    def _extract_keywords_from_input(self, user_input: str, keywords: List[str]) -> List[str]:
+        """Extract and enhance keywords from user input."""
+        
+        # Start with provided keywords
+        extracted = list(keywords) if keywords else []
+        
+        # Simple extraction from input (split on common delimiters)
+        words = user_input.lower().replace(",", " ").replace(";", " ").split()
+        
+        # Filter out common words
+        stop_words = {
+            "the", "a", "an", "is", "are", "was", "were", "be", "been", "being",
+            "have", "has", "had", "do", "does", "did", "will", "would", "could",
+            "should", "may", "might", "must", "shall", "can", "need", "dare",
+            "to", "of", "in", "for", "on", "with", "at", "by", "from", "up",
+            "about", "into", "through", "during", "before", "after", "above",
+            "below", "between", "under", "again", "further", "then", "once",
+            "here", "there", "when", "where", "why", "how", "all", "each",
+            "few", "more", "most", "other", "some", "such", "no", "nor", "not",
+            "only", "own", "same", "so", "than", "too", "very", "just", "and",
+            "but", "if", "or", "because", "as", "until", "while", "i", "we",
+            "you", "they", "what", "which", "who", "whom", "this", "that",
+            "these", "those", "am", "want", "write", "blog", "post", "article",
+        }
+        
+        for word in words:
+            if word not in stop_words and len(word) > 2 and word not in extracted:
+                extracted.append(word)
+        
+        return extracted[:15]  # Limit to 15 keywords
+    
+    def _create_fallback_response(self, user_input: str, keywords: List[str]) -> IntentInferenceResponse:
+        """Create a fallback response when AI inference fails."""
+        
+        # Create a basic intent from the input
+        fallback_intent = ResearchIntent(
+            primary_question=f"What are the key insights about: {user_input}?",
+            secondary_questions=[
+                f"What are the latest trends in {user_input}?",
+                f"What are best practices for {user_input}?",
+            ],
+            purpose=ResearchPurpose.LEARN.value,
+            content_output=ContentOutput.GENERAL.value,
+            expected_deliverables=[
+                ExpectedDeliverable.KEY_STATISTICS.value,
+                ExpectedDeliverable.EXAMPLES.value,
+                ExpectedDeliverable.BEST_PRACTICES.value,
+            ],
+            depth=ResearchDepthLevel.DETAILED.value,
+            focus_areas=[],
+            input_type=InputType.KEYWORDS.value,
+            original_input=user_input,
+            confidence=0.5,
+            needs_clarification=True,
+            clarifying_questions=[
+                "What type of content are you creating?",
+                "What specific aspects are you most interested in?",
+            ],
+        )
+        
+        return IntentInferenceResponse(
+            success=True,  # Still return success, just with lower confidence
+            intent=fallback_intent,
+            analysis_summary=f"Basic research analysis for: {user_input}",
+            suggested_queries=[],
+            suggested_keywords=keywords,
+            suggested_angles=[],
+            quick_options=[],
+        )
--- a/backend/services/research/research_persona_prompt_builder.py
+++ b/backend/services/research/research_persona_prompt_builder.py
@@ -0,0 +1,660 @@
+"""
+Research Persona Prompt Builder
+
+Handles building comprehensive prompts for research persona generation.
+Generates personalized research defaults, suggestions, and configurations.
+"""
+
+from typing import Dict, Any, List
+import json
+from loguru import logger
+
+
+class ResearchPersonaPromptBuilder:
+    """Builds comprehensive prompts for research persona generation."""
+    
+    def build_research_persona_prompt(self, onboarding_data: Dict[str, Any]) -> str:
+        """Build the research persona generation prompt with comprehensive data."""
+        
+        # Extract data from onboarding_data
+        website_analysis = onboarding_data.get("website_analysis", {}) or {}
+        persona_data = onboarding_data.get("persona_data", {}) or {}
+        research_prefs = onboarding_data.get("research_preferences", {}) or {}
+        business_info = onboarding_data.get("business_info", {}) or {}
+        competitor_analysis = onboarding_data.get("competitor_analysis", []) or []
+        
+        # Extract core persona - handle both camelCase and snake_case
+        core_persona = persona_data.get("corePersona") or persona_data.get("core_persona") or {}
+        
+        # Phase 1: Extract key website analysis fields for enhanced personalization
+        writing_style = website_analysis.get("writing_style", {}) or {}
+        content_type = website_analysis.get("content_type", {}) or {}
+        crawl_result = website_analysis.get("crawl_result", {}) or {}
+        
+        # Phase 2: Extract additional fields for pattern-based personalization
+        style_patterns = website_analysis.get("style_patterns", {}) or {}
+        content_characteristics = website_analysis.get("content_characteristics", {}) or {}
+        style_guidelines = website_analysis.get("style_guidelines", {}) or {}
+        
+        # Extract topics/keywords from crawl_result (if available)
+        extracted_topics = self._extract_topics_from_crawl(crawl_result)
+        extracted_keywords = self._extract_keywords_from_crawl(crawl_result)
+        
+        # Phase 2: Extract patterns and vocabulary level
+        extracted_patterns = self._extract_writing_patterns(style_patterns)
+        vocabulary_level = content_characteristics.get("vocabulary_level", "medium") if content_characteristics else "medium"
+        extracted_guidelines = self._extract_style_guidelines(style_guidelines)
+        
+        # Phase 3: Full crawl analysis and comprehensive mapping
+        crawl_analysis = self._analyze_crawl_result_comprehensive(crawl_result)
+        writing_style_mapping = self._map_writing_style_comprehensive(writing_style, content_characteristics)
+        content_themes = self._extract_content_themes(crawl_result, extracted_topics)
+        
+        prompt = f"""
+COMPREHENSIVE RESEARCH PERSONA GENERATION TASK: Create a highly detailed, personalized research persona based on the user's business, writing style, and content strategy. This persona will provide intelligent defaults and suggestions for research inputs.
+
+=== USER CONTEXT ===
+
+BUSINESS INFORMATION:
+{json.dumps(business_info, indent=2)}
+
+WEBSITE ANALYSIS:
+{json.dumps(website_analysis, indent=2)}
+
+CORE PERSONA:
+{json.dumps(core_persona, indent=2)}
+
+RESEARCH PREFERENCES:
+{json.dumps(research_prefs, indent=2)}
+
+COMPETITOR ANALYSIS:
+{json.dumps(competitor_analysis, indent=2) if competitor_analysis else "No competitor data available"}
+
+=== PHASE 1: WEBSITE ANALYSIS INTELLIGENCE ===
+
+WRITING STYLE (for research depth mapping):
+{json.dumps(writing_style, indent=2) if writing_style else "Not available"}
+
+CONTENT TYPE (for preset generation):
+{json.dumps(content_type, indent=2) if content_type else "Not available"}
+
+EXTRACTED TOPICS FROM WEBSITE CONTENT:
+{json.dumps(extracted_topics, indent=2) if extracted_topics else "No topics extracted"}
+
+EXTRACTED KEYWORDS FROM WEBSITE CONTENT:
+{json.dumps(extracted_keywords[:20], indent=2) if extracted_keywords else "No keywords extracted"}
+
+=== PHASE 2: WRITING PATTERNS & STYLE INTELLIGENCE ===
+
+STYLE PATTERNS (for research angles):
+{json.dumps(style_patterns, indent=2) if style_patterns else "Not available"}
+
+EXTRACTED WRITING PATTERNS:
+{json.dumps(extracted_patterns, indent=2) if extracted_patterns else "No patterns extracted"}
+
+CONTENT CHARACTERISTICS (for keyword sophistication):
+{json.dumps(content_characteristics, indent=2) if content_characteristics else "Not available"}
+
+VOCABULARY LEVEL:
+{vocabulary_level}
+
+STYLE GUIDELINES (for query enhancement):
+{json.dumps(style_guidelines, indent=2) if style_guidelines else "Not available"}
+
+EXTRACTED GUIDELINES:
+{json.dumps(extracted_guidelines, indent=2) if extracted_guidelines else "No guidelines extracted"}
+
+=== PHASE 3: COMPREHENSIVE ANALYSIS & MAPPING ===
+
+CRAWL ANALYSIS (Full Content Intelligence):
+{json.dumps(crawl_analysis, indent=2) if crawl_analysis else "No crawl analysis available"}
+
+WRITING STYLE COMPREHENSIVE MAPPING:
+{json.dumps(writing_style_mapping, indent=2) if writing_style_mapping else "No style mapping available"}
+
+CONTENT THEMES (Extracted from Website):
+{json.dumps(content_themes, indent=2) if content_themes else "No themes extracted"}
+
+=== RESEARCH PERSONA GENERATION REQUIREMENTS ===
+
+Generate a comprehensive research persona in JSON format with the following structure:
+
+1. DEFAULT VALUES:
+   - "default_industry": Extract from core_persona.industry, business_info.industry, or website_analysis target_audience. If none available, infer from content patterns in website_analysis or research_preferences. Never use "General" - always provide a specific industry based on context.
+   - "default_target_audience": Extract from core_persona.target_audience, website_analysis.target_audience, or business_info.target_audience. Be specific and descriptive.
+   - "default_research_mode": **PHASE 3 ENHANCEMENT** - Use comprehensive writing_style_mapping:
+     * **PRIMARY**: Use writing_style_mapping.research_depth_preference (from comprehensive analysis)
+     * **SECONDARY**: Map from writing_style.complexity:
+       - If writing_style.complexity == "high": Use "comprehensive" (deep research needed)
+       - If writing_style.complexity == "medium": Use "targeted" (balanced research)
+       - If writing_style.complexity == "low": Use "basic" (quick research)
+     * **FALLBACK**: Use research_preferences.research_depth if complexity not available
+     * This ensures research depth matches the user's writing sophistication level and comprehensive style analysis
+   - "default_provider": **PHASE 3 ENHANCEMENT** - Use writing_style_mapping.provider_preference:
+     * **PRIMARY**: Use writing_style_mapping.provider_preference (from comprehensive style analysis)
+     * **SECONDARY**: Suggest based on user's typical research needs:
+       - Academic/research users: "exa" (semantic search, papers)
+       - News/current events users: "tavily" (real-time, AI answers)
+       - General business users: "exa" (better for content creation)
+     * **DEFAULT**: "exa" (generally better for content creators)
+
+2. KEYWORD INTELLIGENCE:
+   - "suggested_keywords": **PHASE 1 ENHANCEMENT** - Prioritize extracted keywords from crawl_result:
+     * First, use extracted_keywords from website content (top 8-10 most relevant)
+     * Then, supplement with keywords from user's industry, interests (from core_persona), and content goals
+     * Total: 8-12 keywords, with at least 50% from extracted_keywords if available
+     * This ensures keywords reflect the user's actual content topics
+   - "keyword_expansion_patterns": **PHASE 2 ENHANCEMENT** - Create a dictionary mapping common keywords to expanded, industry-specific terms based on vocabulary_level:
+     * If vocabulary_level == "advanced": Use sophisticated, technical, industry-specific terminology
+       Example: {{"AI": ["machine learning algorithms", "neural network architectures", "deep learning frameworks", "algorithmic intelligence systems"], "tools": ["enterprise software platforms", "integrated development environments", "cloud-native solutions"]}}
+     * If vocabulary_level == "medium": Use balanced, professional terminology
+       Example: {{"AI": ["artificial intelligence", "automated systems", "smart technology", "intelligent automation"], "tools": ["software solutions", "digital platforms", "business applications"]}}
+     * If vocabulary_level == "simple": Use accessible, beginner-friendly terminology
+       Example: {{"AI": ["smart technology", "automated tools", "helpful software", "intelligent helpers"], "tools": ["apps", "software", "platforms", "online services"]}}
+     * Include 10-15 patterns, matching the user's vocabulary sophistication level
+     * Focus on industry-specific terminology from the user's domain, but at the appropriate complexity level
+
+3. PROVIDER-SPECIFIC OPTIMIZATION:
+   - "suggested_exa_domains": List 4-6 authoritative domains for the user's industry (e.g., Healthcare: ["pubmed.gov", "nejm.org", "thelancet.com"]).
+   - "suggested_exa_category": Suggest appropriate Exa category based on industry:
+     - Healthcare/Science: "research paper"
+     - Finance: "financial report"
+     - Technology/Business: "company" or "news"
+     - Social Media/Marketing: "tweet" or "linkedin profile"
+     - Default: null (empty string for all categories)
+   - "suggested_exa_search_type": Suggest Exa search algorithm:
+     - Academic/research content: "neural" (semantic understanding)
+     - Current news/trends: "fast" (speed optimized)
+     - General research: "auto" (balanced)
+     - Code/technical: "neural"
+   - "suggested_tavily_topic": Choose based on content type:
+     - Financial content: "finance"
+     - News/current events: "news"
+     - General research: "general"
+   - "suggested_tavily_search_depth": Choose based on research needs:
+     - Quick overview: "basic" (1 credit, faster)
+     - In-depth analysis: "advanced" (2 credits, more comprehensive)
+     - Breaking news: "fast" (speed optimized)
+   - "suggested_tavily_include_answer": AI-generated answers:
+     - For factual queries needing quick answers: "advanced"
+     - For research summaries: "basic"
+     - When building custom content: "false" (use raw results)
+   - "suggested_tavily_time_range": Time filtering:
+     - Breaking news: "day"
+     - Recent developments: "week"
+     - Industry analysis: "month"
+     - Historical research: null (no time limit)
+   - "suggested_tavily_raw_content_format": Raw content for LLM processing:
+     - For blog content creation: "markdown" (structured)
+     - For simple text extraction: "text"
+     - No raw content needed: "false"
+   - "provider_recommendations": Map use cases to best providers:
+     {{"trends": "tavily", "deep_research": "exa", "factual": "google", "news": "tavily", "academic": "exa"}}
+
+4. RESEARCH ANGLES:
+   - "research_angles": **PHASE 2 ENHANCEMENT** - Generate 5-8 alternative research angles/focuses based on:
+     * **PRIMARY SOURCE**: Extract from extracted_patterns (writing patterns from style_patterns):
+       - If "comparison" in patterns: "Compare {{topic}} solutions and alternatives"
+       - If "how-to" or "tutorial" in patterns: "Step-by-step guide to {{topic}} implementation"
+       - If "case-study" or "case_study" in patterns: "Real-world {{topic}} case studies and success stories"
+       - If "trend-analysis" or "trends" in patterns: "Latest {{topic}} trends and future predictions"
+       - If "best-practices" or "best_practices" in patterns: "{{topic}} best practices and industry standards"
+       - If "review" or "evaluation" in patterns: "{{topic}} review and evaluation criteria"
+       - If "problem-solving" in patterns: "{{topic}} problem-solving strategies and solutions"
+     * **SECONDARY SOURCES** (if patterns not available):
+       - User's pain points and challenges (from core_persona.identity or core_persona)
+       - Industry trends and opportunities (from website_analysis or business_info)
+       - Content goals (from research_preferences.content_types)
+       - Audience interests (from core_persona or website_analysis.target_audience)
+       - Competitive landscape (if competitor_analysis exists, include competitive angles)
+     * Make angles specific to the user's industry and actionable for content creation
+     * Use the same language style and structure as the user's writing patterns
+
+5. QUERY ENHANCEMENT:
+   - "query_enhancement_rules": **PHASE 2 ENHANCEMENT** - Create templates for improving vague user queries based on extracted_guidelines:
+     * **PRIMARY SOURCE**: Use extracted_guidelines (from style_guidelines) to create enhancement rules:
+       - If guidelines include "Use specific examples": {{"vague_query": "Research: {{query}} with specific examples and case studies"}}
+       - If guidelines include "Include data points" or "statistics": {{"general_query": "Research: {{query}} including statistics, metrics, and data analysis"}}
+       - If guidelines include "Reference industry standards": {{"basic_query": "Research: {{query}} with industry benchmarks and best practices"}}
+       - If guidelines include "Cite authoritative sources": {{"factual_query": "Research: {{query}} from authoritative sources and expert opinions"}}
+       - If guidelines include "Provide actionable insights": {{"theoretical_query": "Research: {{query}} with actionable strategies and implementation steps"}}
+       - If guidelines include "Compare alternatives": {{"single_item_query": "Research: Compare {{query}} alternatives and evaluate options"}}
+     * **FALLBACK PATTERNS** (if guidelines not available):
+       {{"vague_ai": "Research: AI applications in {{industry}} for {{audience}}", "vague_tools": "Compare top {{industry}} tools", "vague_trends": "Research latest {{industry}} trends and developments", ...}}
+     * Include 5-8 enhancement patterns
+     * Match the enhancement style to the user's writing guidelines and preferences
+
+6. RECOMMENDED PRESETS:
+   - "recommended_presets": **PHASE 3 ENHANCEMENT** - Generate 3-5 personalized research preset templates using comprehensive analysis:
+     * **USE CONTENT THEMES**: If content_themes available, create at least one preset per major theme (up to 3 themes)
+       - Example: If themes include ["AI automation", "content marketing", "SEO strategies"], create presets for each
+       - Use theme names in preset keywords: "Research latest {theme} trends and best practices"
+     * **USE CRAWL ANALYSIS**: Leverage crawl_analysis.content_categories and crawl_analysis.main_topics for preset generation
+       - Create presets that match the user's actual website content categories
+       - Use main_topics for preset keywords and descriptions
+     * **CONTENT TYPE BASED**: Generate presets based on content_type (from Phase 1):
+     * **Content-Type-Specific Presets**: Use content_type.primary_type and content_type.secondary_types to create presets:
+       - If primary_type == "blog": Create "Blog Topic Research" preset with trending topics
+       - If primary_type == "article": Create "Article Research" preset with in-depth analysis
+       - If primary_type == "case_study": Create "Case Study Research" preset with real-world examples
+       - If primary_type == "tutorial": Create "Tutorial Research" preset with step-by-step guides
+       - If "tutorial" in secondary_types: Add "How-To Guide Research" preset
+       - If "comparison" in secondary_types or style_patterns: Add "Comparison Research" preset
+       - If content_type.purpose == "thought_leadership": Create "Thought Leadership Research" with expert insights
+       - If content_type.purpose == "education": Create "Educational Content Research" preset
+     * **Use Extracted Topics**: If extracted_topics available, create at least one preset using actual website topics:
+       - "Latest {extracted_topic} Trends" preset
+       - "{extracted_topic} Best Practices" preset
+     * Each preset should include:
+       - name: Descriptive, action-oriented name that clearly indicates what research will be done
+         * Use research_angles as inspiration for preset names (e.g., "Compare {Industry} Tools", "{Industry} ROI Analysis")
+         * If competitor_analysis exists, create at least one competitive analysis preset (e.g., "Competitive Landscape Analysis")
+         * Make names specific and actionable, not generic
+         * **NEW**: Include content type in name when relevant (e.g., "Blog: {Industry} Trends", "Tutorial: {Topic} Guide")
+       - keywords: Research query string that is:
+         * **NEW**: Use extracted_topics and extracted_keywords when available for more relevant queries
+         * Specific and detailed (not vague like "AI tools")
+         * Industry-focused (includes industry context)
+         * Audience-aware (considers target audience needs)
+         * Actionable (user can immediately understand what research will provide)
+         * Examples: "Research latest AI-powered marketing automation platforms for B2B SaaS companies" (GOOD)
+         * Avoid: "AI tools" or "marketing research" (TOO VAGUE)
+       - industry: User's industry (from business_info or inferred)
+       - target_audience: User's target audience (from business_info or inferred)
+       - research_mode: "basic", "comprehensive", or "targeted" based on:
+         * **NEW**: Also consider content_type.purpose:
+           - "thought_leadership" → "comprehensive" (needs deep research)
+           - "education" → "comprehensive" (needs thorough coverage)
+           - "marketing" → "targeted" (needs specific insights)
+           - "entertainment" → "basic" (needs quick facts)
+         * "comprehensive" for deep analysis, trends, competitive research
+         * "targeted" for specific questions, quick insights
+         * "basic" for simple fact-finding
+       - config: Complete ResearchConfig object with:
+         * provider: Use suggested_exa_category to determine if "exa" or "tavily" is better
+         * exa_category: Use suggested_exa_category if available
+         * exa_include_domains: Use suggested_exa_domains if available (limit to 3-5 most relevant)
+         * exa_search_type: Use suggested_exa_search_type if available
+         * max_sources: 15-25 for comprehensive, 10-15 for targeted, 8-12 for basic
+         * include_competitors: true if competitor_analysis exists and preset is about competitive research
+         * include_trends: true for trend-focused presets
+         * include_statistics: true for data-driven research
+         * include_expert_quotes: true for comprehensive research or thought_leadership content
+       - description: Brief (1-2 sentences) explaining what this preset researches and why it's valuable
+       - icon: Optional emoji that represents the preset (e.g., "📊" for trends, "🎯" for targeted, "🔍" for analysis, "📝" for blog, "📚" for tutorial)
+       - gradient: Optional CSS gradient for visual appeal
+   
+   PRESET GENERATION GUIDELINES:
+   - **PHASE 1 PRIORITY**: Create presets that match the user's actual content types (from content_type)
+   - Use extracted_topics to create presets based on actual website content
+   - Create presets that the user would actually want to use for their content creation
+   - Use research_angles to inspire preset names and keywords
+   - If competitor_analysis has data, create at least one competitive analysis preset
+   - Make each preset unique with different research focus (trends, tools, best practices, competitive, etc.)
+   - Ensure keywords are detailed enough to generate meaningful research
+   - Vary research_mode across presets to offer different depth levels
+   - Use industry-specific terminology in preset names and keywords
+
+7. RESEARCH PREFERENCES:
+   - "research_preferences": Extract and structure research preferences from onboarding:
+     - research_depth: From research_preferences.research_depth
+     - content_types: From research_preferences.content_types
+     - auto_research: From research_preferences.auto_research
+     - factual_content: From research_preferences.factual_content
+
+=== OUTPUT REQUIREMENTS ===
+
+Return a valid JSON object matching this exact structure:
+{{
+  "default_industry": "string",
+  "default_target_audience": "string",
+  "default_research_mode": "basic" | "comprehensive" | "targeted",
+  "default_provider": "google" | "exa",
+  "suggested_keywords": ["keyword1", "keyword2", ...],
+  "keyword_expansion_patterns": {{
+    "keyword": ["expansion1", "expansion2", ...]
+  }},
+   "suggested_exa_domains": ["domain1.com", "domain2.com", ...],
+   "suggested_exa_category": "string or null",
+   "suggested_exa_search_type": "auto | neural | keyword | fast | deep",
+   "suggested_tavily_topic": "general | news | finance",
+   "suggested_tavily_search_depth": "basic | advanced | fast | ultra-fast",
+   "suggested_tavily_include_answer": "false | basic | advanced",
+   "suggested_tavily_time_range": "day | week | month | year or null",
+   "suggested_tavily_raw_content_format": "false | markdown | text",
+   "provider_recommendations": {{
+     "trends": "tavily",
+     "deep_research": "exa",
+     "factual": "google"
+   }},
+  "research_angles": ["angle1", "angle2", ...],
+  "query_enhancement_rules": {{
+    "pattern": "template"
+  }},
+  "recommended_presets": [
+    {{
+      "name": "string",
+      "keywords": "string",
+      "industry": "string",
+      "target_audience": "string",
+      "research_mode": "basic" | "comprehensive" | "targeted",
+      "config": {{
+        "mode": "basic" | "comprehensive" | "targeted",
+        "provider": "google" | "exa",
+        "max_sources": 10 | 15 | 12,
+        "include_statistics": true | false,
+        "include_expert_quotes": true | false,
+        "include_competitors": true | false,
+        "include_trends": true | false,
+        "exa_category": "string or null",
+        "exa_include_domains": ["domain1.com", ...],
+        "exa_search_type": "auto" | "keyword" | "neural"
+      }},
+      "description": "string"
+    }}
+  ],
+  "research_preferences": {{
+    "research_depth": "string",
+    "content_types": ["type1", "type2", ...],
+    "auto_research": true | false,
+    "factual_content": true | false
+  }},
+  "version": "1.0",
+  "confidence_score": 85.0
+}}
+
+=== IMPORTANT INSTRUCTIONS ===
+
+1. Be highly specific and personalized - use actual data from the user's business, persona, and preferences.
+2. NEVER use "General" for industry or target_audience - always infer or create specific categories based on available context.
+3. For minimal data scenarios:
+   - If industry is unclear, infer from research_preferences.content_types or website_analysis.content_characteristics
+   - If target_audience is unclear, infer from writing_style patterns or content goals
+   - Use business_info to fill gaps when persona_data is incomplete
+4. Generate industry-specific intelligence even with limited data:
+   - For content creators: assume "Content Marketing" or "Digital Publishing"
+   - For business users: assume "Business Consulting" or "Professional Services"
+   - For technical users: assume "Technology" or "Software Development"
+5. Ensure all suggested keywords, domains, and angles are relevant to the user's industry and audience.
+6. Generate realistic, actionable presets that the user would actually want to use.
+7. Confidence score should reflect data richness (0-100): higher if rich onboarding data, lower if minimal data.
+8. Return ONLY valid JSON - no markdown formatting, no explanatory text.
+
+Generate the research persona now:
+"""
+        
+        return prompt
+    
+    def _extract_topics_from_crawl(self, crawl_result: Dict[str, Any]) -> List[str]:
+        """
+        Extract topics from crawl_result JSON data.
+        
+        Args:
+            crawl_result: Dictionary containing crawled website data
+            
+        Returns:
+            List of extracted topics (max 15)
+        """
+        topics = []
+        
+        if not crawl_result:
+            return topics
+        
+        try:
+            # Try to extract from common crawl result structures
+            # Method 1: Direct topics field
+            if isinstance(crawl_result.get('topics'), list):
+                topics.extend(crawl_result['topics'][:10])
+            
+            # Method 2: Extract from headings
+            if isinstance(crawl_result.get('headings'), list):
+                headings = crawl_result['headings']
+                # Filter out common non-topic headings
+                filtered_headings = [
+                    h for h in headings[:15] 
+                    if h and len(h.strip()) > 3 
+                    and h.lower() not in ['home', 'about', 'contact', 'menu', 'navigation', 'footer', 'header']
+                ]
+                topics.extend(filtered_headings)
+            
+            # Method 3: Extract from page titles
+            if isinstance(crawl_result.get('titles'), list):
+                titles = crawl_result['titles']
+                topics.extend([t for t in titles[:10] if t and len(t.strip()) > 3])
+            
+            # Method 4: Extract from content sections
+            if isinstance(crawl_result.get('sections'), list):
+                sections = crawl_result['sections']
+                for section in sections[:10]:
+                    if isinstance(section, dict):
+                        section_title = section.get('title') or section.get('heading')
+                        if section_title and len(section_title.strip()) > 3:
+                            topics.append(section_title)
+            
+            # Method 5: Extract from metadata
+            if isinstance(crawl_result.get('metadata'), dict):
+                meta = crawl_result['metadata']
+                if meta.get('title'):
+                    topics.append(meta['title'])
+                if isinstance(meta.get('keywords'), list):
+                    topics.extend(meta['keywords'][:5])
+            
+            # Remove duplicates and clean
+            unique_topics = []
+            seen = set()
+            for topic in topics:
+                if topic and isinstance(topic, str):
+                    cleaned = topic.strip()
+                    if cleaned and cleaned.lower() not in seen:
+                        seen.add(cleaned.lower())
+                        unique_topics.append(cleaned)
+            
+            return unique_topics[:15]  # Limit to 15 topics
+            
+        except Exception as e:
+            logger.debug(f"Error extracting topics from crawl_result: {e}")
+            return []
+    
+    def _extract_keywords_from_crawl(self, crawl_result: Dict[str, Any]) -> List[str]:
+        """
+        Extract keywords from crawl_result JSON data.
+        
+        Args:
+            crawl_result: Dictionary containing crawled website data
+            
+        Returns:
+            List of extracted keywords (max 20)
+        """
+        keywords = []
+        
+        if not crawl_result:
+            return keywords
+        
+        try:
+            # Method 1: Direct keywords field
+            if isinstance(crawl_result.get('keywords'), list):
+                keywords.extend(crawl_result['keywords'][:15])
+            
+            # Method 2: Extract from metadata keywords
+            if isinstance(crawl_result.get('metadata'), dict):
+                meta = crawl_result['metadata']
+                if isinstance(meta.get('keywords'), list):
+                    keywords.extend(meta['keywords'][:10])
+                if meta.get('description'):
+                    # Extract potential keywords from description (simple word extraction)
+                    desc = meta['description']
+                    words = [w.strip() for w in desc.split() if len(w.strip()) > 4]
+                    keywords.extend(words[:5])
+            
+            # Method 3: Extract from tags
+            if isinstance(crawl_result.get('tags'), list):
+                keywords.extend(crawl_result['tags'][:10])
+            
+            # Method 4: Extract from content (simple frequency-based, if available)
+            if isinstance(crawl_result.get('content'), str):
+                content = crawl_result['content']
+                # Simple extraction: words that appear multiple times and are > 4 chars
+                words = content.lower().split()
+                word_freq = {}
+                for word in words:
+                    cleaned = ''.join(c for c in word if c.isalnum())
+                    if len(cleaned) > 4:
+                        word_freq[cleaned] = word_freq.get(cleaned, 0) + 1
+                
+                # Get top keywords by frequency
+                sorted_words = sorted(word_freq.items(), key=lambda x: x[1], reverse=True)
+                keywords.extend([word for word, freq in sorted_words[:10] if freq > 1])
+            
+            # Remove duplicates and clean
+            unique_keywords = []
+            seen = set()
+            for keyword in keywords:
+                if keyword and isinstance(keyword, str):
+                    cleaned = keyword.strip().lower()
+                    if cleaned and len(cleaned) > 2 and cleaned not in seen:
+                        seen.add(cleaned)
+                        unique_keywords.append(keyword.strip())
+            
+            return unique_keywords[:20]  # Limit to 20 keywords
+            
+        except Exception as e:
+            logger.debug(f"Error extracting keywords from crawl_result: {e}")
+            return []
+    
+    def _extract_writing_patterns(self, style_patterns: Dict[str, Any]) -> List[str]:
+        """
+        Extract writing patterns from style_patterns JSON data.
+        
+        Args:
+            style_patterns: Dictionary containing writing patterns analysis
+            
+        Returns:
+            List of extracted patterns (max 10)
+        """
+        patterns = []
+        
+        if not style_patterns:
+            return patterns
+        
+        try:
+            # Method 1: Direct patterns field
+            if isinstance(style_patterns.get('patterns'), list):
+                patterns.extend(style_patterns['patterns'][:10])
+            
+            # Method 2: Common patterns field
+            if isinstance(style_patterns.get('common_patterns'), list):
+                patterns.extend(style_patterns['common_patterns'][:10])
+            
+            # Method 3: Writing patterns field
+            if isinstance(style_patterns.get('writing_patterns'), list):
+                patterns.extend(style_patterns['writing_patterns'][:10])
+            
+            # Method 4: Content structure patterns
+            if isinstance(style_patterns.get('content_structure'), dict):
+                structure = style_patterns['content_structure']
+                if isinstance(structure.get('patterns'), list):
+                    patterns.extend(structure['patterns'][:5])
+            
+            # Method 5: Extract from analysis field
+            if isinstance(style_patterns.get('analysis'), dict):
+                analysis = style_patterns['analysis']
+                if isinstance(analysis.get('identified_patterns'), list):
+                    patterns.extend(analysis['identified_patterns'][:10])
+            
+            # Normalize patterns (lowercase, remove duplicates)
+            normalized_patterns = []
+            seen = set()
+            for pattern in patterns:
+                if pattern and isinstance(pattern, str):
+                    cleaned = pattern.strip().lower().replace('_', '-').replace(' ', '-')
+                    if cleaned and cleaned not in seen:
+                        seen.add(cleaned)
+                        normalized_patterns.append(cleaned)
+            
+            return normalized_patterns[:10]  # Limit to 10 patterns
+            
+        except Exception as e:
+            logger.debug(f"Error extracting writing patterns: {e}")
+            return []
+    
+    def _extract_style_guidelines(self, style_guidelines: Dict[str, Any]) -> List[str]:
+        """
+        Extract style guidelines from style_guidelines JSON data.
+        
+        Args:
+            style_guidelines: Dictionary containing generated style guidelines
+            
+        Returns:
+            List of extracted guidelines (max 15)
+        """
+        guidelines = []
+        
+        if not style_guidelines:
+            return guidelines
+        
+        try:
+            # Method 1: Direct guidelines field
+            if isinstance(style_guidelines.get('guidelines'), list):
+                guidelines.extend(style_guidelines['guidelines'][:15])
+            
+            # Method 2: Recommendations field
+            if isinstance(style_guidelines.get('recommendations'), list):
+                guidelines.extend(style_guidelines['recommendations'][:15])
+            
+            # Method 3: Best practices field
+            if isinstance(style_guidelines.get('best_practices'), list):
+                guidelines.extend(style_guidelines['best_practices'][:10])
+            
+            # Method 4: Tone recommendations
+            if isinstance(style_guidelines.get('tone_recommendations'), list):
+                guidelines.extend(style_guidelines['tone_recommendations'][:5])
+            
+            # Method 5: Structure guidelines
+            if isinstance(style_guidelines.get('structure_guidelines'), list):
+                guidelines.extend(style_guidelines['structure_guidelines'][:5])
+            
+            # Method 6: Vocabulary suggestions
+            if isinstance(style_guidelines.get('vocabulary_suggestions'), list):
+                guidelines.extend(style_guidelines['vocabulary_suggestions'][:5])
+            
+            # Method 7: Engagement tips
+            if isinstance(style_guidelines.get('engagement_tips'), list):
+                guidelines.extend(style_guidelines['engagement_tips'][:5])
+            
+            # Method 8: Audience considerations
+            if isinstance(style_guidelines.get('audience_considerations'), list):
+                guidelines.extend(style_guidelines['audience_considerations'][:5])
+            
+            # Method 9: SEO optimization (if available)
+            if isinstance(style_guidelines.get('seo_optimization'), list):
+                guidelines.extend(style_guidelines['seo_optimization'][:3])
+            
+            # Method 10: Conversion optimization (if available)
+            if isinstance(style_guidelines.get('conversion_optimization'), list):
+                guidelines.extend(style_guidelines['conversion_optimization'][:3])
+            
+            # Remove duplicates and clean
+            unique_guidelines = []
+            seen = set()
+            for guideline in guidelines:
+                if guideline and isinstance(guideline, str):
+                    cleaned = guideline.strip()
+                    # Normalize for comparison (lowercase, remove extra spaces)
+                    normalized = ' '.join(cleaned.lower().split())
+                    if cleaned and normalized not in seen and len(cleaned) > 5:
+                        seen.add(normalized)
+                        unique_guidelines.append(cleaned)
+            
+            return unique_guidelines[:15]  # Limit to 15 guidelines
+            
+        except Exception as e:
+            logger.debug(f"Error extracting style guidelines: {e}")
+            return []
+    
+    def get_json_schema(self) -> Dict[str, Any]:
+        """Return JSON schema for structured LLM response."""
+        # This will be used with llm_text_gen(json_struct=...)
+        from models.research_persona_models import ResearchPersona, ResearchPreset
+        
+        # Convert Pydantic model to JSON schema
+        return ResearchPersona.schema()
--- a/backend/services/research/research_persona_scheduler.py
+++ b/backend/services/research/research_persona_scheduler.py
@@ -0,0 +1,194 @@
+"""
+Research Persona Scheduler
+Handles scheduled generation of research personas after onboarding.
+"""
+
+from datetime import datetime, timedelta, timezone
+from typing import Dict, Any
+from loguru import logger
+
+from services.database import get_db_session
+from services.research.research_persona_service import ResearchPersonaService
+from models.scheduler_models import SchedulerEventLog
+
+
+async def generate_research_persona_task(user_id: str):
+    """
+    Async task function to generate research persona for a user.
+    
+    This function is called by the scheduler 20 minutes after onboarding completion.
+    
+    Args:
+        user_id: User ID (Clerk string)
+    """
+    db = None
+    try:
+        logger.info(f"Scheduled research persona generation started for user {user_id}")
+        
+        # Get database session
+        db = get_db_session()
+        if not db:
+            logger.error(f"Failed to get database session for research persona generation (user: {user_id})")
+            return
+        
+        # Generate research persona
+        persona_service = ResearchPersonaService(db_session=db)
+        
+        # Check if persona already exists to avoid unnecessary API calls
+        persona_data = persona_service._get_persona_data_record(user_id)
+        if persona_data and persona_data.research_persona:
+            logger.info(f"Research persona already exists for user {user_id}, skipping generation")
+            return
+        
+        start_time = datetime.utcnow()
+        try:
+            research_persona = persona_service.get_or_generate(user_id, force_refresh=False)
+            execution_time = (datetime.utcnow() - start_time).total_seconds()
+            
+            if research_persona:
+                logger.info(f"✅ Scheduled research persona generation completed for user {user_id}")
+                
+                # Log success to scheduler event log for dashboard
+                try:
+                    event_log = SchedulerEventLog(
+                        event_type='job_completed',
+                        event_date=start_time,
+                        job_id=f"research_persona_{user_id}",
+                        job_type='one_time',
+                        user_id=user_id,
+                        event_data={
+                            'job_function': 'generate_research_persona_task',
+                            'execution_time_seconds': execution_time,
+                            'status': 'success'
+                        }
+                    )
+                    db.add(event_log)
+                    db.commit()
+                except Exception as log_error:
+                    logger.warning(f"Failed to log persona generation success to scheduler event log: {log_error}")
+                    if db:
+                        db.rollback()
+            else:
+                error_msg = (
+                    f"Scheduled research persona generation FAILED for user {user_id}. "
+                    f"Expensive API call was made but generation failed. "
+                    f"Will NOT automatically retry to prevent wasteful API calls."
+                )
+                logger.error(f"❌ {error_msg}")
+                
+                # Log failure to scheduler event log for dashboard visibility
+                try:
+                    event_log = SchedulerEventLog(
+                        event_type='job_failed',
+                        event_date=start_time,
+                        job_id=f"research_persona_{user_id}",
+                        job_type='one_time',
+                        user_id=user_id,
+                        error_message=error_msg,
+                        event_data={
+                            'job_function': 'generate_research_persona_task',
+                            'execution_time_seconds': execution_time,
+                            'status': 'failed',
+                            'failure_reason': 'generation_returned_none',
+                            'expensive_api_call': True
+                        }
+                    )
+                    db.add(event_log)
+                    db.commit()
+                except Exception as log_error:
+                    logger.warning(f"Failed to log persona generation failure to scheduler event log: {log_error}")
+                    if db:
+                        db.rollback()
+                
+                # DO NOT reschedule - this prevents infinite retry loops
+                # User can manually trigger generation from frontend if needed
+        except Exception as gen_error:
+            execution_time = (datetime.utcnow() - start_time).total_seconds()
+            error_msg = (
+                f"Exception during scheduled research persona generation for user {user_id}: {str(gen_error)}. "
+                f"Expensive API call may have been made. Will NOT automatically retry."
+            )
+            logger.error(f"❌ {error_msg}")
+            
+            # Log exception to scheduler event log for dashboard visibility
+            try:
+                event_log = SchedulerEventLog(
+                    event_type='job_failed',
+                    event_date=start_time,
+                    job_id=f"research_persona_{user_id}",  # Match scheduled job ID format
+                    job_type='one_time',
+                    user_id=user_id,
+                    error_message=error_msg,
+                    event_data={
+                        'job_function': 'generate_research_persona_task',
+                        'execution_time_seconds': execution_time,
+                        'status': 'failed',
+                        'failure_reason': 'exception',
+                        'exception_type': type(gen_error).__name__,
+                        'exception_message': str(gen_error),
+                        'expensive_api_call': True
+                    }
+                )
+                db.add(event_log)
+                db.commit()
+            except Exception as log_error:
+                logger.warning(f"Failed to log persona generation exception to scheduler event log: {log_error}")
+                if db:
+                    db.rollback()
+            
+            # DO NOT reschedule - prevent infinite retry loops
+            
+    except Exception as e:
+        logger.error(f"Error in scheduled research persona generation for user {user_id}: {e}")
+    finally:
+        if db:
+            try:
+                db.close()
+            except Exception as e:
+                logger.error(f"Error closing database session: {e}")
+
+
+def schedule_research_persona_generation(user_id: str, delay_minutes: int = 20) -> str:
+    """
+    Schedule research persona generation for a user after a delay.
+    
+    Args:
+        user_id: User ID (Clerk string)
+        delay_minutes: Delay in minutes before generating persona (default: 20)
+        
+    Returns:
+        Job ID
+    """
+    try:
+        from services.scheduler import get_scheduler
+        
+        scheduler = get_scheduler()
+        
+        # Calculate run date (current time + delay) - ensure UTC timezone-aware
+        run_date = datetime.now(timezone.utc) + timedelta(minutes=delay_minutes)
+        
+        # Generate consistent job ID (without timestamp) for proper restoration
+        # This allows restoration to find and restore the job with original scheduled time
+        # Note: Clerk user_id already includes "user_" prefix, so we don't add it again
+        job_id = f"research_persona_{user_id}"
+        
+        # Schedule the task
+        scheduled_job_id = scheduler.schedule_one_time_task(
+            func=generate_research_persona_task,
+            run_date=run_date,
+            job_id=job_id,
+            kwargs={"user_id": user_id},
+            replace_existing=True
+        )
+        
+        logger.info(
+            f"Scheduled research persona generation for user {user_id} "
+            f"at {run_date} (job_id: {scheduled_job_id})"
+        )
+        
+        return scheduled_job_id
+        
+    except Exception as e:
+        logger.error(f"Failed to schedule research persona generation for user {user_id}: {e}")
+        raise
+
--- a/backend/services/research/research_persona_service.py
+++ b/backend/services/research/research_persona_service.py
@@ -0,0 +1,421 @@
+"""
+Research Persona Service
+
+Handles generation, caching, and retrieval of AI-powered research personas.
+"""
+
+from typing import Dict, Any, Optional
+from datetime import datetime, timedelta
+from loguru import logger
+from fastapi import HTTPException
+
+from services.database import get_db_session
+from models.onboarding import PersonaData, OnboardingSession
+from models.research_persona_models import ResearchPersona
+from .research_persona_prompt_builder import ResearchPersonaPromptBuilder
+from services.llm_providers.main_text_generation import llm_text_gen
+from services.onboarding.database_service import OnboardingDatabaseService
+from services.persona_data_service import PersonaDataService
+
+
+class ResearchPersonaService:
+    """Service for generating and managing research personas."""
+    
+    CACHE_TTL_DAYS = 7  # 7-day cache TTL
+    
+    def __init__(self, db_session=None):
+        self.db = db_session or get_db_session()
+        self.prompt_builder = ResearchPersonaPromptBuilder()
+        self.onboarding_service = OnboardingDatabaseService(db=self.db)
+        self.persona_data_service = PersonaDataService(db_session=self.db)
+    
+    def get_cached_only(
+        self, 
+        user_id: str
+    ) -> Optional[ResearchPersona]:
+        """
+        Get research persona for user ONLY if it exists in cache.
+        This method NEVER generates - it only returns cached personas.
+        Use this for config endpoints to avoid triggering rate limit checks.
+        
+        Args:
+            user_id: User ID (Clerk string)
+            
+        Returns:
+            ResearchPersona if cached and valid, None otherwise
+        """
+        try:
+            # Get persona data record
+            persona_data = self._get_persona_data_record(user_id)
+            
+            if not persona_data:
+                logger.debug(f"No persona data found for user {user_id}")
+                return None
+            
+            # Only return if cache is valid and persona exists
+            if self.is_cache_valid(persona_data) and persona_data.research_persona:
+                try:
+                    logger.debug(f"Returning cached research persona for user {user_id}")
+                    return ResearchPersona(**persona_data.research_persona)
+                except Exception as e:
+                    logger.warning(f"Failed to parse cached research persona: {e}")
+                    return None
+            
+            # Cache invalid or persona missing - return None (don't generate)
+            logger.debug(f"No valid cached research persona for user {user_id}")
+            return None
+                
+        except Exception as e:
+            logger.error(f"Error getting cached research persona for user {user_id}: {e}")
+            return None
+
+    def get_or_generate(
+        self, 
+        user_id: str, 
+        force_refresh: bool = False
+    ) -> Optional[ResearchPersona]:
+        """
+        Get research persona for user, generating if missing or expired.
+        
+        Args:
+            user_id: User ID (Clerk string)
+            force_refresh: If True, regenerate even if cache is valid
+            
+        Returns:
+            ResearchPersona if successful, None otherwise
+        """
+        try:
+            # Get persona data record
+            persona_data = self._get_persona_data_record(user_id)
+            
+            if not persona_data:
+                logger.warning(f"No persona data found for user {user_id}, cannot generate research persona")
+                return None
+            
+            # Check cache if not forcing refresh
+            if not force_refresh and self.is_cache_valid(persona_data):
+                if persona_data.research_persona:
+                    logger.info(f"Using cached research persona for user {user_id}")
+                    try:
+                        return ResearchPersona(**persona_data.research_persona)
+                    except Exception as e:
+                        logger.warning(f"Failed to parse cached research persona: {e}, regenerating...")
+                        # Fall through to regeneration
+                else:
+                    logger.info(f"Research persona missing for user {user_id}, generating...")
+            else:
+                if force_refresh:
+                    logger.info(f"Forcing refresh of research persona for user {user_id}")
+                else:
+                    logger.info(f"Cache expired for user {user_id}, regenerating...")
+            
+            # Generate new research persona
+            try:
+                research_persona = self.generate_research_persona(user_id)
+            except HTTPException:
+                # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
+                raise
+            
+            if research_persona:
+                # Save to database
+                if self.save_research_persona(user_id, research_persona):
+                    logger.info(f"✅ Research persona generated and saved for user {user_id}")
+                else:
+                    logger.warning(f"Failed to save research persona for user {user_id}")
+                
+                return research_persona
+            else:
+                # Log detailed error for debugging expensive failures
+                logger.error(
+                    f"❌ Failed to generate research persona for user {user_id} - "
+                    f"This is an expensive failure (API call consumed). Check logs above for details."
+                )
+                # Don't return None silently - let the caller know this failed
+                return None
+                
+        except HTTPException:
+            # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
+            raise
+        except Exception as e:
+            logger.error(f"Error getting/generating research persona for user {user_id}: {e}")
+            return None
+    
+    def generate_research_persona(self, user_id: str) -> Optional[ResearchPersona]:
+        """
+        Generate a new research persona for the user.
+        
+        Args:
+            user_id: User ID (Clerk string)
+            
+        Returns:
+            ResearchPersona if successful, None otherwise
+        """
+        try:
+            logger.info(f"Generating research persona for user {user_id}")
+            
+            # Collect onboarding data
+            onboarding_data = self._collect_onboarding_data(user_id)
+            
+            if not onboarding_data:
+                logger.warning(f"Insufficient onboarding data for user {user_id}")
+                return None
+            
+            # Build prompt
+            prompt = self.prompt_builder.build_research_persona_prompt(onboarding_data)
+            
+            # Get JSON schema for structured response
+            json_schema = self.prompt_builder.get_json_schema()
+            
+            # Call LLM with structured JSON response
+            logger.info(f"Calling LLM for research persona generation (user: {user_id})")
+            try:
+                response_text = llm_text_gen(
+                    prompt=prompt,
+                    json_struct=json_schema,
+                    user_id=user_id
+                )
+            except HTTPException:
+                # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
+                logger.warning(f"HTTPException during LLM call for user {user_id} - re-raising")
+                raise
+            except RuntimeError as e:
+                # Re-raise RuntimeError (subscription limits) as HTTPException
+                logger.warning(f"RuntimeError during LLM call for user {user_id}: {e}")
+                raise HTTPException(status_code=429, detail=str(e))
+            
+            if not response_text:
+                logger.error("Empty response from LLM")
+                return None
+            
+            # Parse JSON response
+            import json
+            try:
+                # When json_struct is provided, llm_text_gen may return a dict directly
+                if isinstance(response_text, dict):
+                    # Already parsed, use directly
+                    persona_dict = response_text
+                elif isinstance(response_text, str):
+                    # Handle case where LLM returns markdown-wrapped JSON or plain JSON string
+                    response_text = response_text.strip()
+                    if response_text.startswith("```json"):
+                        response_text = response_text[7:]
+                    if response_text.startswith("```"):
+                        response_text = response_text[3:]
+                    if response_text.endswith("```"):
+                        response_text = response_text[:-3]
+                    response_text = response_text.strip()
+                    
+                    persona_dict = json.loads(response_text)
+                else:
+                    logger.error(f"Unexpected response type from LLM: {type(response_text)}")
+                    return None
+                
+                # Add generated_at timestamp
+                persona_dict["generated_at"] = datetime.utcnow().isoformat()
+                
+                # Validate and create ResearchPersona
+                # Log the dict structure for debugging if validation fails
+                try:
+                    research_persona = ResearchPersona(**persona_dict)
+                    logger.info(f"✅ Research persona generated successfully for user {user_id}")
+                    return research_persona
+                except Exception as validation_error:
+                    logger.error(f"Failed to validate ResearchPersona from dict: {validation_error}")
+                    logger.debug(f"Persona dict keys: {list(persona_dict.keys()) if isinstance(persona_dict, dict) else 'Not a dict'}")
+                    logger.debug(f"Persona dict sample: {str(persona_dict)[:500]}")
+                    # Re-raise to be caught by outer exception handler
+                    raise
+                
+            except json.JSONDecodeError as e:
+                logger.error(f"Failed to parse LLM response as JSON: {e}")
+                logger.debug(f"Response text: {response_text[:500] if isinstance(response_text, str) else str(response_text)[:500]}")
+                return None
+            except Exception as e:
+                logger.error(f"Failed to create ResearchPersona from response: {e}")
+                return None
+                
+        except HTTPException:
+            # Re-raise HTTPExceptions (e.g., 429 subscription limit) so they propagate to API
+            raise
+        except Exception as e:
+            logger.error(f"Error generating research persona for user {user_id}: {e}")
+            return None
+    
+    def is_cache_valid(self, persona_data: PersonaData) -> bool:
+        """
+        Check if cached research persona is still valid (within TTL).
+        
+        Args:
+            persona_data: PersonaData database record
+            
+        Returns:
+            True if cache is valid, False otherwise
+        """
+        if not persona_data.research_persona_generated_at:
+            return False
+        
+        # Check if within TTL
+        cache_age = datetime.utcnow() - persona_data.research_persona_generated_at
+        is_valid = cache_age < timedelta(days=self.CACHE_TTL_DAYS)
+        
+        if not is_valid:
+            logger.debug(f"Cache expired (age: {cache_age.days} days, TTL: {self.CACHE_TTL_DAYS} days)")
+        
+        return is_valid
+    
+    def save_research_persona(
+        self, 
+        user_id: str, 
+        research_persona: ResearchPersona
+    ) -> bool:
+        """
+        Save research persona to database.
+        
+        Args:
+            user_id: User ID (Clerk string)
+            research_persona: ResearchPersona to save
+            
+        Returns:
+            True if successful, False otherwise
+        """
+        try:
+            persona_data = self._get_persona_data_record(user_id)
+            
+            if not persona_data:
+                logger.error(f"No persona data record found for user {user_id}")
+                return False
+            
+            # Convert ResearchPersona to dict for JSON storage
+            persona_dict = research_persona.dict()
+            
+            # Update database record
+            persona_data.research_persona = persona_dict
+            persona_data.research_persona_generated_at = datetime.utcnow()
+            
+            self.db.commit()
+            
+            logger.info(f"✅ Research persona saved for user {user_id}")
+            return True
+            
+        except Exception as e:
+            logger.error(f"Error saving research persona for user {user_id}: {e}")
+            self.db.rollback()
+            return False
+    
+    def _get_persona_data_record(self, user_id: str) -> Optional[PersonaData]:
+        """Get PersonaData database record for user."""
+        try:
+            # Ensure research_persona columns exist before querying
+            self.onboarding_service._ensure_research_persona_columns(self.db)
+            
+            # Get onboarding session
+            session = self.db.query(OnboardingSession).filter(
+                OnboardingSession.user_id == user_id
+            ).first()
+            
+            if not session:
+                return None
+            
+            # Get persona data
+            persona_data = self.db.query(PersonaData).filter(
+                PersonaData.session_id == session.id
+            ).first()
+            
+            return persona_data
+            
+        except Exception as e:
+            logger.error(f"Error getting persona data record for user {user_id}: {e}")
+            return None
+    
+    def _collect_onboarding_data(self, user_id: str) -> Optional[Dict[str, Any]]:
+        """
+        Collect all onboarding data needed for research persona generation.
+        
+        Returns:
+            Dictionary with website_analysis, persona_data, research_preferences, business_info
+        """
+        try:
+            # Get website analysis
+            website_analysis = self.onboarding_service.get_website_analysis(user_id, self.db) or {}
+            
+            # Get persona data
+            persona_data_dict = self.onboarding_service.get_persona_data(user_id, self.db) or {}
+            
+            # Get research preferences
+            research_prefs = self.onboarding_service.get_research_preferences(user_id, self.db) or {}
+            
+            # Get business info - construct from persona data and website analysis
+            business_info = {}
+            
+            # Try to extract from persona data
+            if persona_data_dict:
+                core_persona = persona_data_dict.get('corePersona') or persona_data_dict.get('core_persona')
+                if core_persona:
+                    if core_persona.get('industry'):
+                        business_info['industry'] = core_persona['industry']
+                    if core_persona.get('target_audience'):
+                        business_info['target_audience'] = core_persona['target_audience']
+            
+            # Fallback to website analysis if not in persona
+            if not business_info.get('industry') and website_analysis:
+                target_audience_data = website_analysis.get('target_audience', {})
+                if isinstance(target_audience_data, dict):
+                    industry_focus = target_audience_data.get('industry_focus')
+                    if industry_focus:
+                        business_info['industry'] = industry_focus
+                    demographics = target_audience_data.get('demographics')
+                    if demographics:
+                        business_info['target_audience'] = demographics if isinstance(demographics, str) else str(demographics)
+            
+            # Check if we have enough data - be more lenient since we can infer from minimal data
+            # We need at least some basic information to generate a meaningful persona
+            has_basic_data = bool(
+                website_analysis or
+                persona_data_dict or
+                research_prefs.get('content_types') or
+                business_info.get('industry')
+            )
+
+            if not has_basic_data:
+                logger.warning(f"Insufficient onboarding data for user {user_id} - no basic data found")
+                return None
+
+            # If we have minimal data, add intelligent defaults to help the AI
+            if not business_info.get('industry'):
+                # Try to infer industry from research preferences or content types
+                content_types = research_prefs.get('content_types', [])
+                if 'blog' in content_types or 'article' in content_types:
+                    business_info['industry'] = 'Content Marketing'
+                    business_info['inferred'] = True
+                elif 'social_media' in content_types:
+                    business_info['industry'] = 'Social Media Marketing'
+                    business_info['inferred'] = True
+                elif 'video' in content_types:
+                    business_info['industry'] = 'Video Content Creation'
+                    business_info['inferred'] = True
+
+            if not business_info.get('target_audience'):
+                # Default to professionals for content creators
+                business_info['target_audience'] = 'Professionals and content consumers'
+                business_info['inferred'] = True
+            
+            # Get competitor analysis data (if available)
+            competitor_analysis = None
+            try:
+                competitor_analysis = self.onboarding_service.get_competitor_analysis(user_id, self.db)
+                if competitor_analysis:
+                    logger.info(f"Found {len(competitor_analysis)} competitors for research persona generation")
+            except Exception as e:
+                logger.debug(f"Could not retrieve competitor analysis for persona generation: {e}")
+            
+            return {
+                "website_analysis": website_analysis,
+                "persona_data": persona_data_dict,
+                "research_preferences": research_prefs,
+                "business_info": business_info,
+                "competitor_analysis": competitor_analysis  # Add competitor data for better preset generation
+            }
+            
+        except Exception as e:
+            logger.error(f"Error collecting onboarding data for user {user_id}: {e}")
+            return None
--- a/backend/services/research/tavily_service.py
+++ b/backend/services/research/tavily_service.py
@@ -0,0 +1,425 @@
+"""
+Tavily API Service for ALwrity
+
+This service provides web search and research capabilities using the Tavily API,
+which offers AI-powered search with real-time information retrieval.
+
+Key Features:
+- Web search with AI-powered results
+- Content extraction and summarization
+- Real-time information retrieval
+- Topic-based search (general, news, finance)
+- Advanced search depth options
+- Cost-effective API usage with caching
+
+Dependencies:
+- aiohttp (for async HTTP requests)
+- os (for environment variables)
+- logging (for debugging)
+
+Author: ALwrity Team
+Version: 1.0
+Last Updated: January 2025
+"""
+
+import os
+import json
+import aiohttp
+from typing import Dict, List, Optional, Any, Union
+from datetime import datetime, timedelta
+from loguru import logger
+from urllib.parse import urlparse
+
+
+class TavilyService:
+    """
+    Service for web search and research using the Tavily API.
+    
+    This service provides AI-powered search capabilities to find relevant
+    content and information for research purposes.
+    """
+    
+    def __init__(self):
+        """Initialize the Tavily Service with API credentials."""
+        self.api_key = os.getenv("TAVILY_API_KEY")
+        self.base_url = "https://api.tavily.com"
+        self.enabled = False
+
+        # Don't assume key is available at import time in production.
+        # Keys may be injected per-request via middleware, so defer init.
+        self._try_initialize()
+
+    def _try_initialize(self) -> None:
+        """Attempt to (re)initialize the Tavily service from current environment."""
+        if self.enabled and self.api_key:
+            return
+        try:
+            self.api_key = os.getenv("TAVILY_API_KEY")
+            if not self.api_key:
+                # Leave disabled; caller may try again after middleware injection
+                logger.warning("TAVILY_API_KEY not configured; Tavily service will be disabled")
+                self.enabled = False
+                return
+            self.enabled = True
+            logger.info("Tavily Service initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize Tavily service: {e}")
+            self.enabled = False
+    
+    async def search(
+        self,
+        query: str,
+        topic: str = "general",
+        search_depth: str = "basic",
+        max_results: int = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        include_answer: Union[bool, str] = False,
+        include_raw_content: Union[bool, str] = False,
+        include_images: bool = False,
+        include_image_descriptions: bool = False,
+        include_favicon: bool = False,
+        time_range: Optional[str] = None,
+        start_date: Optional[str] = None,
+        end_date: Optional[str] = None,
+        country: Optional[str] = None,
+        chunks_per_source: int = 3,
+        auto_parameters: bool = False
+    ) -> Dict[str, Any]:
+        """
+        Execute a search query using Tavily API.
+        
+        Args:
+            query: The search query to execute
+            topic: Category of search (general, news, finance)
+            search_depth: Depth of search (basic, advanced) - basic costs 1 credit, advanced costs 2
+            max_results: Maximum number of results to return (0-20)
+            include_domains: List of domains to specifically include
+            exclude_domains: List of domains to specifically exclude
+            include_answer: Include LLM-generated answer (basic/advanced/true/false)
+            include_raw_content: Include raw HTML content (markdown/text/true/false)
+            include_images: Include image search results
+            include_image_descriptions: Include image descriptions
+            include_favicon: Include favicon URLs
+            time_range: Time range filter (day, week, month, year, d, w, m, y)
+            start_date: Start date filter (YYYY-MM-DD)
+            end_date: End date filter (YYYY-MM-DD)
+            country: Country filter (boost results from specific country)
+            chunks_per_source: Maximum chunks per source (1-3, only for advanced search)
+            auto_parameters: Auto-configure parameters based on query
+            
+        Returns:
+            Dictionary containing search results
+        """
+        try:
+            # Ensure we pick up any per-request injected key
+            self._try_initialize()
+            if not self.enabled:
+                raise ValueError("Tavily Service is not enabled - API key missing")
+            
+            logger.info(f"Starting Tavily search for: {query}")
+            
+            # Build request payload
+            payload = {
+                "api_key": self.api_key,
+                "query": query,
+                "topic": topic,
+                "search_depth": search_depth,
+                "max_results": min(max_results, 20),  # Tavily limit
+                "include_favicon": include_favicon
+            }
+            
+            # Add optional parameters
+            if include_domains:
+                payload["include_domains"] = include_domains[:300]  # Tavily limit
+            
+            if exclude_domains:
+                payload["exclude_domains"] = exclude_domains[:150]  # Tavily limit
+            
+            if include_answer:
+                payload["include_answer"] = include_answer
+            
+            if include_raw_content:
+                payload["include_raw_content"] = include_raw_content
+            
+            if include_images:
+                payload["include_images"] = include_images
+                if include_image_descriptions:
+                    payload["include_image_descriptions"] = include_image_descriptions
+            
+            if time_range:
+                payload["time_range"] = time_range
+            
+            if start_date:
+                payload["start_date"] = start_date
+            
+            if end_date:
+                payload["end_date"] = end_date
+            
+            if country and topic == "general":
+                payload["country"] = country
+            
+            if search_depth == "advanced" and 1 <= chunks_per_source <= 3:
+                payload["chunks_per_source"] = chunks_per_source
+            
+            if auto_parameters:
+                payload["auto_parameters"] = True
+            
+            # Make API request
+            async with aiohttp.ClientSession() as session:
+                async with session.post(
+                    f"{self.base_url}/search",
+                    json=payload,
+                    headers={"Content-Type": "application/json"},
+                    timeout=aiohttp.ClientTimeout(total=60)
+                ) as response:
+                    if response.status == 200:
+                        result = await response.json()
+                        logger.info(f"Tavily search completed successfully. Found {len(result.get('results', []))} results.")
+                        
+                        # Process and structure results
+                        processed_results = self._process_search_results(result, query)
+                        
+                        return {
+                            "success": True,
+                            "query": result.get("query", query),
+                            "answer": result.get("answer"),  # If include_answer was requested
+                            "results": processed_results,
+                            "images": result.get("images", []),
+                            "response_time": result.get("response_time"),
+                            "request_id": result.get("request_id"),
+                            "auto_parameters": result.get("auto_parameters"),
+                            "total_results": len(processed_results),
+                            "timestamp": datetime.utcnow().isoformat()
+                        }
+                    else:
+                        error_text = await response.text()
+                        logger.error(f"Tavily API error: {response.status} - {error_text}")
+                        raise RuntimeError(f"Tavily API error: {response.status} - {error_text}")
+                        
+        except aiohttp.ClientTimeout:
+            logger.error("Tavily API request timed out")
+            return {
+                "success": False,
+                "error": "Request timed out",
+                "details": "The search request took too long to complete"
+            }
+        except Exception as e:
+            logger.error(f"Error in Tavily search: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during search"
+            }
+    
+    def _process_search_results(self, api_response: Dict[str, Any], query: str) -> List[Dict[str, Any]]:
+        """
+        Process and structure Tavily API response into standardized format.
+        
+        Args:
+            api_response: Raw response from Tavily API
+            query: Original search query
+            
+        Returns:
+            List of processed search results
+        """
+        results = []
+        raw_results = api_response.get("results", [])
+        
+        for result in raw_results:
+            try:
+                # Extract domain from URL
+                url = result.get("url", "")
+                domain = urlparse(url).netloc if url else ""
+                
+                # Calculate relevance score (Tavily provides score field)
+                relevance_score = result.get("score", 0.5)
+                
+                processed_result = {
+                    "url": url,
+                    "domain": domain,
+                    "title": result.get("title", ""),
+                    "content": result.get("content", ""),
+                    "raw_content": result.get("raw_content"),  # If include_raw_content was requested
+                    "score": relevance_score,
+                    "relevance_score": relevance_score,  # Alias for compatibility
+                    "favicon": result.get("favicon"),
+                    "published_date": result.get("published_date"),
+                }
+                
+                results.append(processed_result)
+                
+            except Exception as e:
+                logger.warning(f"Error processing Tavily result: {str(e)}")
+                continue
+        
+        # Sort by relevance score (highest first)
+        results.sort(key=lambda x: x.get("relevance_score", 0), reverse=True)
+        
+        return results
+    
+    async def search_industry_trends(
+        self,
+        topic: str,
+        industry: str,
+        max_results: int = 10,
+        search_depth: str = "basic"
+    ) -> Dict[str, Any]:
+        """
+        Search for current industry trends and insights.
+        
+        Args:
+            topic: The specific topic to research
+            industry: The industry context for the search
+            max_results: Maximum number of search results to return
+            search_depth: Depth of search (basic or advanced)
+            
+        Returns:
+            Dictionary containing search results with industry context
+        """
+        # Build industry-specific query
+        search_query = f"{topic} {industry} trends insights"
+        
+        # Use news topic for current trends
+        return await self.search(
+            query=search_query,
+            topic="news" if search_depth == "basic" else "general",
+            search_depth=search_depth,
+            max_results=max_results,
+            include_answer="basic",
+            include_favicon=True,
+            time_range="month"  # Last month for current trends
+        )
+    
+    async def discover_competitors(
+        self,
+        user_url: str,
+        num_results: int = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        industry_context: Optional[str] = None,
+        website_analysis_data: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Discover competitors for a given website using Tavily search.
+        
+        Args:
+            user_url: The website URL to find competitors for
+            num_results: Number of competitor results to return
+            include_domains: List of domains to include in search
+            exclude_domains: List of domains to exclude from search
+            industry_context: Industry context for better competitor discovery
+            
+        Returns:
+            Dictionary containing competitor analysis results
+        """
+        try:
+            # Ensure we pick up any per-request injected key
+            self._try_initialize()
+            if not self.enabled:
+                raise ValueError("Tavily Service is not enabled - API key missing")
+            
+            logger.info(f"Starting competitor discovery for: {user_url}")
+            
+            # Extract user domain for exclusion
+            user_domain = urlparse(user_url).netloc
+            exclude_domains_list = exclude_domains or []
+            exclude_domains_list.append(user_domain)
+            
+            # Build search query
+            query_parts = ["similar websites", "competitors"]
+            if industry_context:
+                query_parts.append(f"in {industry_context}")
+            
+            # Extract insights from website analysis if available
+            if website_analysis_data:
+                analysis = website_analysis_data.get('analysis', {})
+                if 'target_audience' in analysis:
+                    audience = analysis['target_audience']
+                    if isinstance(audience, dict) and 'primary_audience' in audience:
+                        query_parts.append(audience['primary_audience'])
+            
+            search_query = " ".join(query_parts)
+            
+            # Perform search
+            search_result = await self.search(
+                query=search_query,
+                topic="general",
+                search_depth="advanced",  # Use advanced for better competitor discovery
+                max_results=num_results,
+                include_domains=include_domains,
+                exclude_domains=exclude_domains_list,
+                include_favicon=True,
+                chunks_per_source=3
+            )
+            
+            if not search_result.get("success"):
+                return search_result
+            
+            # Process results into competitor format
+            competitors = []
+            for result in search_result.get("results", []):
+                competitor_data = {
+                    "url": result.get("url"),
+                    "domain": result.get("domain"),
+                    "title": result.get("title"),
+                    "summary": result.get("content", ""),
+                    "relevance_score": result.get("relevance_score", 0.5),
+                    "favicon": result.get("favicon"),
+                    "published_date": result.get("published_date"),
+                    "highlights": self._extract_highlights(result.get("content", "")),
+                    "competitive_insights": self._extract_competitive_insights(result),
+                    "content_insights": self._analyze_content_quality(result)
+                }
+                competitors.append(competitor_data)
+            
+            logger.info(f"Successfully discovered {len(competitors)} competitors for {user_url}")
+            
+            return {
+                "success": True,
+                "user_url": user_url,
+                "competitors": competitors,
+                "total_competitors": len(competitors),
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+                "industry_context": industry_context,
+                "request_id": search_result.get("request_id")
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in competitor discovery: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during competitor discovery"
+            }
+    
+    def _extract_highlights(self, content: str, num_sentences: int = 3) -> List[str]:
+        """Extract key highlights from content."""
+        if not content:
+            return []
+        
+        # Simple sentence extraction (can be enhanced with NLP)
+        sentences = [s.strip() for s in content.split('.') if s.strip()]
+        return sentences[:num_sentences]
+    
+    def _extract_competitive_insights(self, result: Dict[str, Any]) -> Dict[str, Any]:
+        """Extract competitive insights from search result."""
+        content = result.get("content", "")
+        title = result.get("title", "")
+        
+        return {
+            "business_model": "unknown",
+            "target_audience": "unknown",
+            "key_differentiators": []
+        }
+    
+    def _analyze_content_quality(self, result: Dict[str, Any]) -> Dict[str, Any]:
+        """Analyze content quality metrics."""
+        content = result.get("content", "")
+        
+        return {
+            "content_focus": "general",
+            "content_quality": "medium",
+            "publishing_frequency": "unknown"
+        }
+