Base code

2026-01-08 22:39:53 +07:00
parent 697115c61a
commit c35fa52117
2169 changed files with 626670 additions and 0 deletions
--- a/backend/services/research/exa_service.py
+++ b/backend/services/research/exa_service.py
@@ -0,0 +1,794 @@
+"""
+Exa API Service for ALwrity
+
+This service provides competitor discovery and analysis using the Exa API,
+which uses neural search to find semantically similar websites and content.
+
+Key Features:
+- Competitor discovery using neural search
+- Content analysis and summarization
+- Competitive intelligence gathering
+- Cost-effective API usage with caching
+- Integration with onboarding Step 3
+
+Dependencies:
+- aiohttp (for async HTTP requests)
+- os (for environment variables)
+- logging (for debugging)
+
+Author: ALwrity Team
+Version: 1.0
+Last Updated: January 2025
+"""
+
+import os
+import json
+import asyncio
+from typing import Dict, List, Optional, Any, Union
+from datetime import datetime, timedelta
+from loguru import logger
+from urllib.parse import urlparse
+from exa_py import Exa
+
+class ExaService:
+    """
+    Service for competitor discovery and analysis using the Exa API.
+    
+    This service provides neural search capabilities to find semantically similar
+    websites and analyze their content for competitive intelligence.
+    """
+    
+    def __init__(self):
+        """Initialize the Exa Service with API credentials."""
+        self.api_key = os.getenv("EXA_API_KEY")
+        self.exa = None
+        self.enabled = False
+
+        # Don't assume key is available at import time in production.
+        # Keys may be injected per-request via middleware, so defer init.
+        self._try_initialize()
+
+    def _try_initialize(self) -> None:
+        """Attempt to (re)initialize the Exa SDK from current environment."""
+        if self.enabled and self.exa:
+            return
+        try:
+            self.api_key = os.getenv("EXA_API_KEY")
+            if not self.api_key:
+                # Leave disabled; caller may try again after middleware injection
+                logger.warning("EXA_API_KEY not configured; Exa service will be disabled")
+                self.enabled = False
+                self.exa = None
+                return
+            self.exa = Exa(api_key=self.api_key)
+            self.enabled = True
+            logger.info("Exa Service initialized successfully")
+        except Exception as e:
+            logger.error(f"Failed to initialize Exa service: {e}")
+            self.enabled = False
+            self.exa = None
+    
+    async def discover_competitors(
+        self,
+        user_url: str,
+        num_results: int = 10,
+        include_domains: Optional[List[str]] = None,
+        exclude_domains: Optional[List[str]] = None,
+        industry_context: Optional[str] = None,
+        website_analysis_data: Optional[Dict[str, Any]] = None
+    ) -> Dict[str, Any]:
+        """
+        Discover competitors for a given website using Exa's neural search.
+        
+        Args:
+            user_url: The website URL to find competitors for
+            num_results: Number of competitor results to return (max 100)
+            include_domains: List of domains to include in search
+            exclude_domains: List of domains to exclude from search
+            industry_context: Industry context for better competitor discovery
+            
+        Returns:
+            Dictionary containing competitor analysis results
+        """
+        try:
+            # Ensure we pick up any per-request injected key
+            self._try_initialize()
+            if not self.enabled:
+                raise ValueError("Exa Service is not enabled - API key missing")
+            
+            logger.info(f"Starting competitor discovery for: {user_url}")
+            
+            # Extract user domain for exclusion
+            user_domain = urlparse(user_url).netloc
+            exclude_domains_list = exclude_domains or []
+            exclude_domains_list.append(user_domain)
+            
+            logger.info(f"Excluding domains: {exclude_domains_list}")
+            
+            # Extract insights from website analysis for better targeting
+            include_text_queries = []
+            summary_query = f"Business model, target audience, content strategy{f' in {industry_context}' if industry_context else ''}"
+            
+            if website_analysis_data:
+                analysis = website_analysis_data.get('analysis', {})
+                
+                # Extract key business terms from the analysis
+                if 'target_audience' in analysis:
+                    audience = analysis['target_audience']
+                    if isinstance(audience, dict) and 'primary_audience' in audience:
+                        primary_audience = audience['primary_audience']
+                        if len(primary_audience.split()) <= 5:  # Exa limit
+                            include_text_queries.append(primary_audience)
+                
+                # Use industry context from analysis
+                if 'industry' in analysis and analysis['industry']:
+                    industry = analysis['industry']
+                    if len(industry.split()) <= 5:
+                        include_text_queries.append(industry)
+                
+                # Enhance summary query with analysis insights
+                if 'content_type' in analysis:
+                    content_type = analysis['content_type']
+                    summary_query += f", {content_type} content strategy"
+                
+                logger.info(f"Enhanced targeting with analysis data: {include_text_queries}")
+            
+            # Use the Exa SDK to find similar links with content and context
+            search_result = self.exa.find_similar_and_contents(
+                url=user_url,
+                num_results=min(num_results, 10),  # Exa API limit
+                include_domains=include_domains,
+                exclude_domains=exclude_domains_list,
+                include_text=include_text_queries if include_text_queries else None,
+                text=True,
+                highlights={
+                    "numSentences": 2,
+                    "highlightsPerUrl": 3,
+                    "query": "Unique value proposition, competitive advantages, market position"
+                },
+                summary={
+                    "query": summary_query
+                }
+            )
+            
+            # TODO: Add context generation once SDK supports it
+            # For now, we'll generate a basic context from the results
+            context_result = None
+            
+            # Log the raw Exa API response summary (avoiding verbose markdown content)
+            logger.info(f"📊 Exa API response for {user_url}:")
+            logger.info(f"  ├─ Request ID: {getattr(search_result, 'request_id', 'N/A')}")
+            logger.info(f"  ├─ Results count: {len(getattr(search_result, 'results', []))}")
+            logger.info(f"  └─ Cost: ${getattr(getattr(search_result, 'cost_dollars', None), 'total', 0)}")
+            
+            # Note: Full raw response contains verbose markdown content - logging only summary
+            # To see full response, set EXA_DEBUG=true in environment
+            
+            # Extract results from search
+            results = getattr(search_result, 'results', [])
+            
+            # Log summary of results
+            logger.info(f"  - Found {len(results)} competitors")
+            
+            # Process and structure the results
+            competitors = self._process_competitor_results(search_result, user_url)
+            
+            logger.info(f"Successfully discovered {len(competitors)} competitors for {user_url}")
+            
+            return {
+                "success": True,
+                "user_url": user_url,
+                "competitors": competitors,
+                "total_competitors": len(competitors),
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+                "industry_context": industry_context,
+                "api_cost": getattr(getattr(search_result, 'cost_dollars', None), 'total', 0) if hasattr(search_result, 'cost_dollars') and getattr(search_result, 'cost_dollars', None) else 0,
+                "request_id": getattr(search_result, 'request_id', None) if hasattr(search_result, 'request_id') else None
+            }
+                        
+        except asyncio.TimeoutError:
+            logger.error("Exa API request timed out")
+            return {
+                "success": False,
+                "error": "Request timed out",
+                "details": "The competitor discovery request took too long to complete"
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in competitor discovery: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during competitor discovery"
+            }
+    
+    def _process_competitor_results(self, search_result, user_url: str) -> List[Dict[str, Any]]:
+        """
+        Process and structure the Exa SDK response into competitor data.
+        
+        Args:
+            search_result: Response from Exa SDK
+            user_url: Original user URL for reference
+            
+        Returns:
+            List of processed competitor data
+        """
+        competitors = []
+        user_domain = urlparse(user_url).netloc
+        
+        # Extract results from the SDK response
+        results = getattr(search_result, 'results', [])
+        
+        for result in results:
+            try:
+                # Extract basic information from the result object
+                competitor_url = getattr(result, 'url', '')
+                competitor_domain = urlparse(competitor_url).netloc
+                
+                # Skip if it's the same domain as the user
+                if competitor_domain == user_domain:
+                    continue
+                
+                # Extract content insights
+                summary = getattr(result, 'summary', '')
+                highlights = getattr(result, 'highlights', [])
+                highlight_scores = getattr(result, 'highlight_scores', [])
+                
+                # Calculate competitive relevance score
+                relevance_score = self._calculate_relevance_score(result, user_url)
+                
+                competitor_data = {
+                    "url": competitor_url,
+                    "domain": competitor_domain,
+                    "title": getattr(result, 'title', ''),
+                    "published_date": getattr(result, 'published_date', None),
+                    "author": getattr(result, 'author', None),
+                    "favicon": getattr(result, 'favicon', None),
+                    "image": getattr(result, 'image', None),
+                    "summary": summary,
+                    "highlights": highlights,
+                    "highlight_scores": highlight_scores,
+                    "relevance_score": relevance_score,
+                    "competitive_insights": self._extract_competitive_insights(summary, highlights),
+                    "content_analysis": self._analyze_content_quality(result)
+                }
+                
+                competitors.append(competitor_data)
+                
+            except Exception as e:
+                logger.warning(f"Error processing competitor result: {str(e)}")
+                continue
+        
+        # Sort by relevance score (highest first)
+        competitors.sort(key=lambda x: x["relevance_score"], reverse=True)
+        
+        return competitors
+    
+    def _calculate_relevance_score(self, result, user_url: str) -> float:
+        """
+        Calculate a relevance score for competitor ranking.
+        
+        Args:
+            result: Competitor result from Exa SDK
+            user_url: Original user URL
+            
+        Returns:
+            Relevance score between 0 and 1
+        """
+        score = 0.0
+        
+        # Base score from highlight scores
+        highlight_scores = getattr(result, 'highlight_scores', [])
+        if highlight_scores:
+            score += sum(highlight_scores) / len(highlight_scores) * 0.4
+        
+        # Score from summary quality
+        summary = getattr(result, 'summary', '')
+        if summary and len(summary) > 100:
+            score += 0.3
+        
+        # Score from title relevance
+        title = getattr(result, 'title', '').lower()
+        if any(keyword in title for keyword in ["business", "company", "service", "solution", "platform"]):
+            score += 0.2
+        
+        # Score from URL structure similarity
+        competitor_url = getattr(result, 'url', '')
+        if self._url_structure_similarity(user_url, competitor_url) > 0.5:
+            score += 0.1
+        
+        return min(score, 1.0)
+    
+    def _url_structure_similarity(self, url1: str, url2: str) -> float:
+        """
+        Calculate URL structure similarity.
+        
+        Args:
+            url1: First URL
+            url2: Second URL
+            
+        Returns:
+            Similarity score between 0 and 1
+        """
+        try:
+            parsed1 = urlparse(url1)
+            parsed2 = urlparse(url2)
+            
+            # Compare path structure
+            path1_parts = [part for part in parsed1.path.split('/') if part]
+            path2_parts = [part for part in parsed2.path.split('/') if part]
+            
+            if not path1_parts or not path2_parts:
+                return 0.0
+            
+            # Calculate similarity based on path length and structure
+            max_parts = max(len(path1_parts), len(path2_parts))
+            common_parts = sum(1 for p1, p2 in zip(path1_parts, path2_parts) if p1 == p2)
+            
+            return common_parts / max_parts
+            
+        except Exception:
+            return 0.0
+    
+    def _extract_competitive_insights(self, summary: str, highlights: List[str]) -> Dict[str, Any]:
+        """
+        Extract competitive insights from summary and highlights.
+        
+        Args:
+            summary: Content summary
+            highlights: Content highlights
+            
+        Returns:
+            Dictionary of competitive insights
+        """
+        insights = {
+            "business_model": "",
+            "target_audience": "",
+            "value_proposition": "",
+            "competitive_advantages": [],
+            "content_strategy": ""
+        }
+        
+        # Combine summary and highlights for analysis
+        content = f"{summary} {' '.join(highlights)}".lower()
+        
+        # Extract business model indicators
+        business_models = ["saas", "platform", "service", "product", "consulting", "agency", "marketplace"]
+        for model in business_models:
+            if model in content:
+                insights["business_model"] = model.title()
+                break
+        
+        # Extract target audience indicators
+        audiences = ["enterprise", "small business", "startups", "developers", "marketers", "consumers"]
+        for audience in audiences:
+            if audience in content:
+                insights["target_audience"] = audience.title()
+                break
+        
+        # Extract value proposition from highlights
+        if highlights:
+            insights["value_proposition"] = highlights[0][:100] + "..." if len(highlights[0]) > 100 else highlights[0]
+        
+        return insights
+    
+    def _analyze_content_quality(self, result) -> Dict[str, Any]:
+        """
+        Analyze the content quality of a competitor.
+        
+        Args:
+            result: Competitor result from Exa SDK
+            
+        Returns:
+            Dictionary of content quality metrics
+        """
+        quality_metrics = {
+            "content_depth": "medium",
+            "technical_sophistication": "medium",
+            "content_freshness": "unknown",
+            "engagement_potential": "medium"
+        }
+        
+        # Analyze content depth from summary length
+        summary = getattr(result, 'summary', '')
+        if len(summary) > 300:
+            quality_metrics["content_depth"] = "high"
+        elif len(summary) < 100:
+            quality_metrics["content_depth"] = "low"
+        
+        # Analyze technical sophistication
+        technical_keywords = ["api", "integration", "automation", "analytics", "data", "platform"]
+        highlights = getattr(result, 'highlights', [])
+        content_text = f"{summary} {' '.join(highlights)}".lower()
+        
+        technical_count = sum(1 for keyword in technical_keywords if keyword in content_text)
+        if technical_count >= 3:
+            quality_metrics["technical_sophistication"] = "high"
+        elif technical_count == 0:
+            quality_metrics["technical_sophistication"] = "low"
+        
+        return quality_metrics
+    
+    async def discover_social_media_accounts(self, user_url: str) -> Dict[str, Any]:
+        """
+        Discover social media accounts for a given website using Exa's answer API.
+        
+        Args:
+            user_url: The website URL to find social media accounts for
+            
+        Returns:
+            Dictionary containing social media discovery results
+        """
+        try:
+            # Ensure we pick up any per-request injected key
+            self._try_initialize()
+            if not self.enabled:
+                raise ValueError("Exa Service is not enabled - API key missing")
+            
+            logger.info(f"Starting social media discovery for: {user_url}")
+            
+            # Extract domain from URL for better targeting
+            domain = urlparse(user_url).netloc.replace('www.', '')
+            
+            # Use Exa's answer API to find social media accounts
+            result = self.exa.answer(
+                f"Find all social media accounts of the url: {domain}. Return a JSON object with facebook, twitter, instagram, linkedin, youtube, and tiktok fields containing the URLs or empty strings if not found.",
+                model="exa-pro",
+                text=True
+            )
+            
+            # Log the raw Exa API response for debugging
+            logger.info(f"Raw Exa social media response for {user_url}:")
+            logger.info(f"  - Request ID: {getattr(result, 'request_id', 'N/A')}")
+            logger.info(f"  └─ Cost: ${getattr(getattr(result, 'cost_dollars', None), 'total', 0)}")
+            # Note: Full raw response contains verbose content - logging only summary
+            # To see full response, set EXA_DEBUG=true in environment
+            
+            # Extract social media data
+            answer_text = getattr(result, 'answer', '')
+            citations = getattr(result, 'citations', [])
+            
+            # Convert AnswerResult objects to dictionaries for JSON serialization
+            citations_dicts = []
+            for citation in citations:
+                if hasattr(citation, '__dict__'):
+                    # Convert object to dictionary
+                    citation_dict = {
+                        'id': getattr(citation, 'id', ''),
+                        'title': getattr(citation, 'title', ''),
+                        'url': getattr(citation, 'url', ''),
+                        'text': getattr(citation, 'text', ''),
+                        'snippet': getattr(citation, 'snippet', ''),
+                        'published_date': getattr(citation, 'published_date', None),
+                        'author': getattr(citation, 'author', None),
+                        'image': getattr(citation, 'image', None),
+                        'favicon': getattr(citation, 'favicon', None)
+                    }
+                    citations_dicts.append(citation_dict)
+                else:
+                    # If it's already a dict, use as is
+                    citations_dicts.append(citation)
+            
+            logger.info(f"  - Raw answer text: {answer_text}")
+            logger.info(f"  - Citations count: {len(citations_dicts)}")
+            
+            # Parse the response from the answer (could be JSON or markdown format)
+            try:
+                import json
+                import re
+                
+                if answer_text.strip().startswith('{'):
+                    # Direct JSON format
+                    answer_data = json.loads(answer_text.strip())
+                else:
+                    # Parse markdown format with URLs
+                    answer_data = {
+                        "facebook": "",
+                        "twitter": "",
+                        "instagram": "",
+                        "linkedin": "",
+                        "youtube": "",
+                        "tiktok": ""
+                    }
+                    
+                    # Extract URLs using regex patterns
+                    facebook_match = re.search(r'Facebook.*?\[([^\]]+)\]', answer_text)
+                    if facebook_match:
+                        answer_data["facebook"] = facebook_match.group(1)
+                    
+                    twitter_match = re.search(r'Twitter.*?\[([^\]]+)\]', answer_text)
+                    if twitter_match:
+                        answer_data["twitter"] = twitter_match.group(1)
+                    
+                    instagram_match = re.search(r'Instagram.*?\[([^\]]+)\]', answer_text)
+                    if instagram_match:
+                        answer_data["instagram"] = instagram_match.group(1)
+                    
+                    linkedin_match = re.search(r'LinkedIn.*?\[([^\]]+)\]', answer_text)
+                    if linkedin_match:
+                        answer_data["linkedin"] = linkedin_match.group(1)
+                    
+                    youtube_match = re.search(r'YouTube.*?\[([^\]]+)\]', answer_text)
+                    if youtube_match:
+                        answer_data["youtube"] = youtube_match.group(1)
+                    
+                    tiktok_match = re.search(r'TikTok.*?\[([^\]]+)\]', answer_text)
+                    if tiktok_match:
+                        answer_data["tiktok"] = tiktok_match.group(1)
+                        
+            except (json.JSONDecodeError, AttributeError, KeyError):
+                # If parsing fails, create empty structure
+                answer_data = {
+                    "facebook": "",
+                    "twitter": "",
+                    "instagram": "",
+                    "linkedin": "",
+                    "youtube": "",
+                    "tiktok": ""
+                }
+            
+            logger.info(f"  - Parsed social media accounts:")
+            for platform, url in answer_data.items():
+                if url:
+                    logger.info(f"    {platform}: {url}")
+            
+            return {
+                "success": True,
+                "user_url": user_url,
+                "social_media_accounts": answer_data,
+                "citations": citations_dicts,
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+                "api_cost": getattr(getattr(result, 'cost_dollars', None), 'total', 0) if hasattr(result, 'cost_dollars') and getattr(result, 'cost_dollars', None) else 0,
+                "request_id": getattr(result, 'request_id', None) if hasattr(result, 'request_id') else None
+            }
+                        
+        except Exception as e:
+            logger.error(f"Error in social media discovery: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during social media discovery"
+            }
+    
+    def _generate_basic_context(self, results: List[Any], user_url: str) -> str:
+        """
+        Generate a basic context string from competitor results for LLM consumption.
+        
+        Args:
+            results: List of competitor results from Exa API
+            user_url: Original user URL for reference
+            
+        Returns:
+            Formatted context string
+        """
+        context_parts = [
+            f"Competitive Analysis for: {user_url}",
+            f"Found {len(results)} similar websites/competitors:",
+            ""
+        ]
+        
+        for i, result in enumerate(results[:5], 1):  # Limit to top 5 for context
+            url = getattr(result, 'url', 'Unknown URL')
+            title = getattr(result, 'title', 'Unknown Title')
+            summary = getattr(result, 'summary', 'No summary available')
+            
+            context_parts.extend([
+                f"{i}. {title}",
+                f"   URL: {url}",
+                f"   Summary: {summary[:200]}{'...' if len(summary) > 200 else ''}",
+                ""
+            ])
+        
+        context_parts.append("Key insights:")
+        context_parts.append("- These competitors offer similar services or content")
+        context_parts.append("- Analyze their content strategy and positioning")
+        context_parts.append("- Identify opportunities for differentiation")
+        
+        return "\n".join(context_parts)
+            
+    async def analyze_competitor_content(
+        self,
+        competitor_url: str,
+        analysis_depth: str = "standard"
+    ) -> Dict[str, Any]:
+        """
+        Perform deeper analysis of a specific competitor.
+        
+        Args:
+            competitor_url: URL of the competitor to analyze
+            analysis_depth: Depth of analysis ("quick", "standard", "deep")
+            
+        Returns:
+            Dictionary containing detailed competitor analysis
+        """
+        try:
+            logger.info(f"Starting detailed analysis for competitor: {competitor_url}")
+            
+            # Get similar content from this competitor
+            similar_results = await self.discover_competitors(
+                competitor_url,
+                num_results=10,
+                include_domains=[urlparse(competitor_url).netloc]
+            )
+            
+            if not similar_results["success"]:
+                return similar_results
+            
+            # Analyze content patterns
+            content_patterns = self._analyze_content_patterns(similar_results["competitors"])
+            
+            # Generate competitive insights
+            competitive_insights = self._generate_competitive_insights(
+                competitor_url,
+                similar_results["competitors"],
+                content_patterns
+            )
+            
+            return {
+                "success": True,
+                "competitor_url": competitor_url,
+                "content_patterns": content_patterns,
+                "competitive_insights": competitive_insights,
+                "analysis_timestamp": datetime.utcnow().isoformat(),
+                "analysis_depth": analysis_depth
+            }
+            
+        except Exception as e:
+            logger.error(f"Error in competitor content analysis: {str(e)}")
+            return {
+                "success": False,
+                "error": str(e),
+                "details": "An unexpected error occurred during competitor analysis"
+            }
+    
+    def _analyze_content_patterns(self, competitors: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """
+        Analyze content patterns across competitors.
+        
+        Args:
+            competitors: List of competitor data
+            
+        Returns:
+            Dictionary of content patterns
+        """
+        patterns = {
+            "common_themes": [],
+            "content_types": [],
+            "publishing_patterns": {},
+            "target_keywords": [],
+            "content_strategies": []
+        }
+        
+        # Analyze common themes
+        all_summaries = [comp.get("summary", "") for comp in competitors]
+        # This would be enhanced with NLP analysis in a full implementation
+        
+        # Analyze content types from URLs
+        content_types = set()
+        for comp in competitors:
+            url = comp.get("url", "")
+            if "/blog/" in url:
+                content_types.add("blog")
+            elif "/product/" in url or "/service/" in url:
+                content_types.add("product")
+            elif "/about/" in url:
+                content_types.add("about")
+            elif "/contact/" in url:
+                content_types.add("contact")
+        
+        patterns["content_types"] = list(content_types)
+        
+        return patterns
+    
+    def _generate_competitive_insights(
+        self,
+        competitor_url: str,
+        competitors: List[Dict[str, Any]],
+        content_patterns: Dict[str, Any]
+    ) -> Dict[str, Any]:
+        """
+        Generate competitive insights from analysis data.
+        
+        Args:
+            competitor_url: URL of the competitor
+            competitors: List of competitor data
+            content_patterns: Content pattern analysis
+            
+        Returns:
+            Dictionary of competitive insights
+        """
+        insights = {
+            "competitive_strengths": [],
+            "content_opportunities": [],
+            "market_positioning": "unknown",
+            "strategic_recommendations": []
+        }
+        
+        # Analyze competitive strengths
+        for comp in competitors:
+            if comp.get("relevance_score", 0) > 0.7:
+                insights["competitive_strengths"].append({
+                    "strength": comp.get("summary", "")[:100],
+                    "relevance": comp.get("relevance_score", 0)
+                })
+        
+        # Generate content opportunities
+        if content_patterns.get("content_types"):
+            insights["content_opportunities"] = [
+                f"Develop {content_type} content" 
+                for content_type in content_patterns["content_types"]
+            ]
+        
+        return insights
+    
+    def health_check(self) -> Dict[str, Any]:
+        """
+        Check the health of the Exa service.
+        
+        Returns:
+            Dictionary containing service health status
+        """
+        try:
+            # Ensure latest env before health check
+            self._try_initialize()
+            if not self.enabled:
+                return {
+                    "status": "disabled",
+                    "message": "Exa API key not configured",
+                    "timestamp": datetime.utcnow().isoformat()
+                }
+            
+            # Test with a simple request using the SDK directly
+            test_result = self.exa.find_similar(
+                url="https://example.com",
+                num_results=1
+            )
+            
+            # If we get here without an exception, the API is working
+            return {
+                "status": "healthy",
+                "message": "Exa API is operational",
+                "timestamp": datetime.utcnow().isoformat(),
+                "test_successful": True
+            }
+                
+        except Exception as e:
+            return {
+                "status": "error",
+                "message": f"Health check failed: {str(e)}",
+                "timestamp": datetime.utcnow().isoformat()
+            }
+    
+    def get_cost_estimate(self, num_results: int, include_content: bool = True) -> Dict[str, Any]:
+        """
+        Get cost estimate for Exa API usage.
+        
+        Args:
+            num_results: Number of results requested
+            include_content: Whether to include content analysis
+            
+        Returns:
+            Dictionary containing cost estimate
+        """
+        # Exa API pricing (as of documentation)
+        if num_results <= 25:
+            search_cost = 0.005
+        elif num_results <= 100:
+            search_cost = 0.025
+        else:
+            search_cost = 1.0
+        
+        content_cost = 0.0
+        if include_content:
+            # Estimate content analysis cost
+            content_cost = num_results * 0.001  # Rough estimate
+        
+        total_cost = search_cost + content_cost
+        
+        return {
+            "search_cost": search_cost,
+            "content_cost": content_cost,
+            "total_estimated_cost": total_cost,
+            "num_results": num_results,
+            "include_content": include_content
+        }