ALwrity/backend/services/research/exa_content_research.py

"""
Exa Content Research Provider

Shared Exa neural search provider for content research across ALwrity modules.
Provides simple_search() for fact-checking, content grounding, and research.

Used by:
- LinkedIn Writer (content generation research)
- Blog Writer (fact-checking and writing assistance)

This is the content-research variant. For competitor discovery/analysis,
use ExaService in exa_service.py.
"""

import os
import asyncio
from typing import List, Dict, Any, Optional
from loguru import logger


class ExaContentResearchProvider:
    """Exa neural search provider for content research."""

    def __init__(self):
        """Initialize the Exa content research provider."""
        self.api_key = os.getenv("EXA_API_KEY")
        if not self.api_key:
            raise RuntimeError("EXA_API_KEY not configured")

        from exa_py import Exa
        self.exa = Exa(self.api_key)
        logger.info("✅ Exa Content Research Provider initialized")

    async def simple_search(
        self,
        query: str,
        num_results: int = 5,
        user_id: str = None,
        include_domains: List[str] = None,
        exclude_domains: List[str] = None,
    ) -> List[Dict[str, Any]]:
        """
        Simple Exa search for content research and fact-checking.
        Handles subscription preflight check and usage tracking.

        Args:
            query: Search query string
            num_results: Number of results to return (default 5)
            user_id: Optional user ID for subscription checking
            include_domains: Only return results from these domains
            exclude_domains: Exclude results from these domains

        Returns:
            List of source dicts with title, url, text, publishedDate, author, score keys

        Raises:
            HTTPException(429): If user has exceeded subscription limits
            Exception: If Exa API key not configured or search fails
        """
        # Preflight subscription check
        if user_id:
            from models.subscription_models import APIProvider
            from services.subscription import PricingService
            from services.database import get_session_for_user
            from fastapi import HTTPException

            db = get_session_for_user(user_id)
            if db:
                try:
                    pricing_service = PricingService(db)
                    can_proceed, message, usage_info = pricing_service.check_usage_limits(
                        user_id=user_id,
                        provider=APIProvider.EXA,
                        tokens_requested=0,
                        actual_provider_name="exa",
                    )
                    if not can_proceed:
                        raise HTTPException(status_code=429, detail={
                            'error': 'insufficient_balance',
                            'message': message,
                            'provider': 'exa',
                            'usage_info': usage_info or {}
                        })
                except HTTPException:
                    raise
                except Exception as e:
                    logger.warning(f"[Exa simple_search] Preflight check failed: {e}")
                finally:
                    try:
                        db.close()
                    except Exception:
                        pass

        search_kwargs = {
            "type": "auto",
            "num_results": num_results,
            "text": {"max_characters": 1000},
            "highlights": {"num_sentences": 2, "highlights_per_url": 2},
        }
        if include_domains:
            search_kwargs["include_domains"] = include_domains
        if exclude_domains:
            search_kwargs["exclude_domains"] = exclude_domains

        try:
            loop = asyncio.get_running_loop()
            results = await loop.run_in_executor(
                None,
                lambda: self.exa.search_and_contents(query, **search_kwargs),
            )
        except Exception as e:
            logger.error(f"[Exa simple_search] API call failed: {e}")
            # Retry with simpler parameters
            retry_kwargs = {"type": "auto", "num_results": num_results, "text": True}
            if include_domains:
                retry_kwargs["include_domains"] = include_domains
            if exclude_domains:
                retry_kwargs["exclude_domains"] = exclude_domains
            try:
                logger.info("[Exa simple_search] Retrying with simplified parameters")
                results = await loop.run_in_executor(
                    None,
                    lambda: self.exa.search_and_contents(query, **retry_kwargs),
                )
            except Exception as retry_error:
                logger.error(f"[Exa simple_search] Retry also failed: {retry_error}")
                raise RuntimeError(f"Exa search failed: {str(retry_error)}") from retry_error

        sources = []
        for result in results.results:
            sources.append({
                'title': getattr(result, 'title', 'Untitled'),
                'url': getattr(result, 'url', ''),
                'text': getattr(result, 'text', ''),
                'publishedDate': getattr(result, 'publishedDate', ''),
                'author': getattr(result, 'author', ''),
                'score': (lambda v: v if v is not None else 0.5)(getattr(result, 'score', 0.5)),
            })

        # Track usage
        if user_id:
            cost = 0.005  # ~0.5 cents per search
            try:
                self.track_usage(user_id, cost)
            except Exception as e:
                logger.warning(f"[Exa simple_search] Failed to track usage: {e}")

        logger.info(f"[Exa simple_search] Found {len(sources)} sources for query: {query[:80]}...")
        return sources

    def track_usage(self, user_id: str, cost: float):
        """Track Exa API usage after successful call."""
        from services.database import get_session_for_user
        from services.subscription import PricingService
        from sqlalchemy import text

        db = get_session_for_user(user_id)
        if not db:
            logger.warning(f"[track_usage] Could not get DB session for user {user_id}")
            return
        try:
            pricing_service = PricingService(db)
            current_period = pricing_service.get_current_billing_period(user_id)

            # Update exa_calls and exa_cost via SQL UPDATE
            update_query = text("""
                UPDATE usage_summaries
                SET exa_calls = COALESCE(exa_calls, 0) + 1,
                    exa_cost = COALESCE(exa_cost, 0) + :cost,
                    total_calls = total_calls + 1,
                    total_cost = total_cost + :cost
                WHERE user_id = :user_id AND billing_period = :period
            """)
            db.execute(update_query, {
                'cost': cost,
                'user_id': user_id,
                'period': current_period
            })
            db.commit()

            logger.info(f"[Exa] Tracked usage: user={user_id}, cost=${cost}")
        except Exception as e:
            logger.error(f"[Exa] Failed to track usage: {e}")
            db.rollback()
        finally:
            db.close()


# Global singleton instance
_exa_content_provider: Optional[ExaContentResearchProvider] = None


def get_exa_content_provider() -> ExaContentResearchProvider:
    """Get or create the global Exa content research provider instance."""
    global _exa_content_provider
    if _exa_content_provider is None:
        _exa_content_provider = ExaContentResearchProvider()
    return _exa_content_provider